Beispiel #1
0
 def test_plot_barplot(self):
     matrix = numpy.array([[1, 2, 3, 4], [2, 1, 4, 3], [4, 3, 2, 1]])
     with NamedTemporaryFile(suffix=".png") as fhand:
         fpath = fhand.name
         fhand.close()
         plot_barplot(matrix, ["ref_hom", "het", "alt_hom", "missing"], fpath, stacked=True, figsize=(10, 10))
         os.remove(fpath)
Beispiel #2
0
 def test_plot_barplot(self):
     matrix = numpy.array([[1, 2, 3, 4], [2, 1, 4, 3], [4, 3, 2, 1]])
     with NamedTemporaryFile(suffix='.png') as fhand:
         fpath = fhand.name
         fhand.close()
         plot_barplot(matrix, ['ref_hom', 'het', 'alt_hom', 'missing'],
                      fpath,
                      stacked=True,
                      figsize=(10, 10))
         os.remove(fpath)
def plot_gt_stats_per_sample(variations, data_dir, chunk_size=SNPS_PER_CHUNK):
    gt_stats = calc_gt_type_stats(variations, chunk_size=chunk_size)
    gt_stats = gt_stats.transpose()
    figsize = (variations[GT_FIELD].shape[1], 7)
    
    # All genotypes classes per sample
    fpath = join(data_dir, 'genotype_counts_per_sample.png')
    title = 'Genotypes counts per sample'
    mpl_params = {'set_xlabel': {'args': ['Samples'], 'kwargs': {}},
                  'set_ylabel': {'args': ['Number of GTs'], 'kwargs': {}},
                  'set_title': {'args': [title], 'kwargs': {}}}
    samples = variations.samples
    if samples is not None:
        mpl_params['set_xticklabels'] = {'args': [samples], 'kwargs': {}}
    plot_barplot(gt_stats, ['Ref Homozygous', 'Heterozygous', 'Alt Homozygous',
                            'Missing GT'], mpl_params=mpl_params, 
                 color=['darkslategrey', 'c', 'paleturquoise', 'cadetblue'],
                 fpath=fpath, stacked=True, figsize=figsize)

    # Missing per sample
    fpath = join(data_dir, 'missing_per_sample.png')
    title = 'Missing genotypes counts per sample'
    mpl_params['set_ylabel'] = {'args': ['Missing Genotypes Number'], 'kwargs': {}}
    mpl_params['set_title'] = {'args': [title], 'kwargs': {}}
    plot_barplot(gt_stats[:, -1], ['Missing GT'], mpl_params=mpl_params,
                 fpath=fpath, stacked=True, figsize=figsize)

    # Heterozygous per sample
    fpath = join(data_dir, 'het_per_sample.png')
    title = 'Heterozygous counts per sample'
    mpl_params['set_ylabel'] = {'args': ['Heterozygous Number'], 'kwargs': {}}
    mpl_params['set_title'] = {'args': [title], 'kwargs': {}}
    plot_barplot(gt_stats[:, 1], ['Heterozygous'], mpl_params=mpl_params,
                 fpath=fpath, stacked=True, figsize=figsize)

    # GT percentage without missing values
    fpath = join(data_dir, 'gt_perc_per_sample.png')
    title = 'Genotypes percentage per sample'
    mpl_params['set_ylabel'] = {'args': ['% Genotypes'], 'kwargs': {}}
    mpl_params['set_title'] = {'args': [title], 'kwargs': {}}
    gt_perc = gt_stats[:, :-1] / gt_stats[:, :-1].sum(axis=1, keepdims=True)
    gt_perc *= 100
    plot_barplot(gt_perc, ['Ref Homozygous', 'Heterozygous', 'Alt Homozygous'],
                 mpl_params=mpl_params, fpath=fpath, figsize=figsize)
Beispiel #4
0
def plot_gt_stats_per_sample(variations, data_dir, chunk_size=SNPS_PER_CHUNK):
    gt_stats = calc_gt_type_stats(variations, chunk_size=chunk_size)
    gt_stats = gt_stats.transpose()
    figsize = (variations[GT_FIELD].shape[1], 7)

    # All genotypes classes per sample
    fpath = join(data_dir, 'genotype_counts_per_sample.png')
    title = 'Genotypes counts per sample'
    mpl_params = {
        'set_xlabel': {
            'args': ['Samples'],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': ['Number of GTs'],
            'kwargs': {}
        },
        'set_title': {
            'args': [title],
            'kwargs': {}
        }
    }
    samples = variations.samples
    if samples is not None:
        mpl_params['set_xticklabels'] = {'args': [samples], 'kwargs': {}}
    plot_barplot(
        gt_stats,
        ['Ref Homozygous', 'Heterozygous', 'Alt Homozygous', 'Missing GT'],
        mpl_params=mpl_params,
        color=['darkslategrey', 'c', 'paleturquoise', 'cadetblue'],
        fpath=fpath,
        stacked=True,
        figsize=figsize)

    # Missing per sample
    fpath = join(data_dir, 'missing_per_sample.png')
    title = 'Missing genotypes counts per sample'
    mpl_params['set_ylabel'] = {
        'args': ['Missing Genotypes Number'],
        'kwargs': {}
    }
    mpl_params['set_title'] = {'args': [title], 'kwargs': {}}
    plot_barplot(gt_stats[:, -1], ['Missing GT'],
                 mpl_params=mpl_params,
                 fpath=fpath,
                 stacked=True,
                 figsize=figsize)

    # Heterozygous per sample
    fpath = join(data_dir, 'het_per_sample.png')
    title = 'Heterozygous counts per sample'
    mpl_params['set_ylabel'] = {'args': ['Heterozygous Number'], 'kwargs': {}}
    mpl_params['set_title'] = {'args': [title], 'kwargs': {}}
    plot_barplot(gt_stats[:, 1], ['Heterozygous'],
                 mpl_params=mpl_params,
                 fpath=fpath,
                 stacked=True,
                 figsize=figsize)

    # GT percentage without missing values
    fpath = join(data_dir, 'gt_perc_per_sample.png')
    title = 'Genotypes percentage per sample'
    mpl_params['set_ylabel'] = {'args': ['% Genotypes'], 'kwargs': {}}
    mpl_params['set_title'] = {'args': [title], 'kwargs': {}}
    gt_perc = gt_stats[:, :-1] / gt_stats[:, :-1].sum(axis=1, keepdims=True)
    gt_perc *= 100
    plot_barplot(gt_perc, ['Ref Homozygous', 'Heterozygous', 'Alt Homozygous'],
                 mpl_params=mpl_params,
                 fpath=fpath,
                 figsize=figsize)