Example #1
0
def plot_missing_gt_rate_per_snp(variations,
                                 data_dir,
                                 chunk_size=SNPS_PER_CHUNK):
    _calc_missing_gt = partial(calc_missing_gt, rates=True, axis=1)
    distrib, bins = histogram_for_chunks(variations,
                                         calc_funct=_calc_missing_gt,
                                         range_=(0, 1),
                                         n_bins=20,
                                         chunk_size=chunk_size)

    fpath = join(data_dir, 'missing_gt_rate.png')
    title = 'Missing Genotype rates per SNP distribution'
    plot_distrib(distrib,
                 bins,
                 fhand=open(fpath, 'w'),
                 color='c',
                 mpl_params={
                     'set_xlabel': {
                         'args': ['Missing GT rate'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['SNP number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     }
                 })
    def test_calc_maf_distrib_by_chunk(self):
        varis = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        calc_maf_for_chunk = partial(calc_maf, min_num_genotypes=1,
                                     chunk_size=None)

        distrib, bins = histogram_for_chunks(varis, calc_maf_for_chunk,
                                             n_bins=10)
        dist_expected = [53, 75, 74, 70, 69, 129, 73, 74, 49, 277]
        bins_expected = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95,
                         1.]
        assert numpy.allclose(bins, bins_expected)
        assert numpy.allclose(distrib, dist_expected)
Example #3
0
    def test_calc_maf_distrib_by_chunk(self):
        varis = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        calc_maf_for_chunk = partial(calc_maf, min_num_genotypes=1,
                                     chunk_size=None)

        distrib, bins = histogram_for_chunks(varis, calc_maf_for_chunk,
                                             n_bins=10)
        dist_expected = [53, 72, 77, 66, 73, 129, 74, 73, 49, 277]

        bins_expected = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95,
                         1.]
        assert numpy.allclose(bins, bins_expected)
        assert numpy.allclose(distrib, dist_expected)
def plot_missing_gt_rate_per_snp(variations, data_dir,
                                 chunk_size=SNPS_PER_CHUNK):
    _calc_missing_gt = partial(calc_missing_gt, rates=True, axis=1)
    distrib, bins = histogram_for_chunks(variations,
                                         calc_funct=_calc_missing_gt,
                                         range_=(0, 1), n_bins=20,
                                         chunk_size=chunk_size) 
    
    fpath = join(data_dir, 'missing_gt_rate.png')
    title = 'Missing Genotype rates per SNP distribution'
    plot_distrib(distrib, bins, fhand=open(fpath, 'w'), color='c',
                 mpl_params={'set_xlabel': {'args': ['Missing GT rate'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['SNP number'],
                                            'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}}})
def plot_obs_het(variations, data_dir, chunk_size=SNPS_PER_CHUNK,
                 min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    # Calculate observed heterozygosity distribution by snp
    _calc_obs_het_by_var = partial(calc_obs_het,
                                   min_num_genotypes=min_num_genotypes)
    distrib = histogram_for_chunks(variations, calc_funct=_calc_obs_het_by_var,
                                   n_bins=25, range_=(0, 1),
                                   chunk_size=chunk_size)
    obs_het_var_distrib, bins1 = distrib
    
    # Calculate observed heterozygosity distribution by sample
    obs_het_by_sample = calc_obs_het_by_sample(variations,
                                               chunk_size=chunk_size)
    obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample, n_bins=25,
                                              range_=(0, 1))
    
    # Plot distributions
    fpath = join(data_dir, 'obs_het.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 10))
    canvas = FigureCanvas(fig)
    axes = fig.add_subplot(211)
    title = 'SNP observed Heterozygosity distribution'
    plot_distrib(obs_het_var_distrib, bins=bins1, fhand=open(fpath, 'w'),
                 mpl_params={'set_xlabel': {'args': ['Heterozygosity'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['SNP number'], 'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}},
                             'set_yscale': {'args': ['log'], 'kwargs': {}}},
                 axes=axes, color='c')
    axes = fig.add_subplot(212)
    title = 'Sample observed Heterozygosity distribution'
    plot_distrib(obs_het_sample_distrib, bins=bins2, fhand=open(fpath, 'w'),
                 mpl_params={'set_xlabel': {'args': ['Heterozygosity'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['Sample number'],
                                            'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}}},
                 axes=axes, color='c')
    canvas.print_figure(fhand)
Example #6
0
def plot_obs_het(variations,
                 data_dir,
                 chunk_size=SNPS_PER_CHUNK,
                 min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    # Calculate observed heterozygosity distribution by snp
    _calc_obs_het_by_var = partial(calc_obs_het,
                                   min_num_genotypes=min_num_genotypes)
    distrib = histogram_for_chunks(variations,
                                   calc_funct=_calc_obs_het_by_var,
                                   n_bins=25,
                                   range_=(0, 1),
                                   chunk_size=chunk_size)
    obs_het_var_distrib, bins1 = distrib

    # Calculate observed heterozygosity distribution by sample
    obs_het_by_sample = calc_obs_het_by_sample(variations,
                                               chunk_size=chunk_size)
    obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample,
                                              n_bins=25,
                                              range_=(0, 1))

    # Plot distributions
    fpath = join(data_dir, 'obs_het.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 10))
    canvas = FigureCanvas(fig)
    axes = fig.add_subplot(211)
    title = 'SNP observed Heterozygosity distribution'
    plot_distrib(obs_het_var_distrib,
                 bins=bins1,
                 fhand=open(fpath, 'w'),
                 mpl_params={
                     'set_xlabel': {
                         'args': ['Heterozygosity'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['SNP number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     },
                     'set_yscale': {
                         'args': ['log'],
                         'kwargs': {}
                     }
                 },
                 axes=axes,
                 color='c')
    axes = fig.add_subplot(212)
    title = 'Sample observed Heterozygosity distribution'
    plot_distrib(obs_het_sample_distrib,
                 bins=bins2,
                 fhand=open(fpath, 'w'),
                 mpl_params={
                     'set_xlabel': {
                         'args': ['Heterozygosity'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['Sample number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     }
                 },
                 axes=axes,
                 color='c')
    canvas.print_figure(fhand)