def plot_missing_gt_rate_per_snp(variations, data_dir, chunk_size=SNPS_PER_CHUNK): _calc_missing_gt = partial(calc_missing_gt, rates=True, axis=1) distrib, bins = histogram_for_chunks(variations, calc_funct=_calc_missing_gt, range_=(0, 1), n_bins=20, chunk_size=chunk_size) fpath = join(data_dir, 'missing_gt_rate.png') title = 'Missing Genotype rates per SNP distribution' plot_distrib(distrib, bins, fhand=open(fpath, 'w'), color='c', mpl_params={ 'set_xlabel': { 'args': ['Missing GT rate'], 'kwargs': {} }, 'set_ylabel': { 'args': ['SNP number'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } })
def test_calc_maf_distrib_by_chunk(self): varis = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') calc_maf_for_chunk = partial(calc_maf, min_num_genotypes=1, chunk_size=None) distrib, bins = histogram_for_chunks(varis, calc_maf_for_chunk, n_bins=10) dist_expected = [53, 75, 74, 70, 69, 129, 73, 74, 49, 277] bins_expected = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.] assert numpy.allclose(bins, bins_expected) assert numpy.allclose(distrib, dist_expected)
def test_calc_maf_distrib_by_chunk(self): varis = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') calc_maf_for_chunk = partial(calc_maf, min_num_genotypes=1, chunk_size=None) distrib, bins = histogram_for_chunks(varis, calc_maf_for_chunk, n_bins=10) dist_expected = [53, 72, 77, 66, 73, 129, 74, 73, 49, 277] bins_expected = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.] assert numpy.allclose(bins, bins_expected) assert numpy.allclose(distrib, dist_expected)
def plot_missing_gt_rate_per_snp(variations, data_dir, chunk_size=SNPS_PER_CHUNK): _calc_missing_gt = partial(calc_missing_gt, rates=True, axis=1) distrib, bins = histogram_for_chunks(variations, calc_funct=_calc_missing_gt, range_=(0, 1), n_bins=20, chunk_size=chunk_size) fpath = join(data_dir, 'missing_gt_rate.png') title = 'Missing Genotype rates per SNP distribution' plot_distrib(distrib, bins, fhand=open(fpath, 'w'), color='c', mpl_params={'set_xlabel': {'args': ['Missing GT rate'], 'kwargs': {}}, 'set_ylabel': {'args': ['SNP number'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}})
def plot_obs_het(variations, data_dir, chunk_size=SNPS_PER_CHUNK, min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT): # Calculate observed heterozygosity distribution by snp _calc_obs_het_by_var = partial(calc_obs_het, min_num_genotypes=min_num_genotypes) distrib = histogram_for_chunks(variations, calc_funct=_calc_obs_het_by_var, n_bins=25, range_=(0, 1), chunk_size=chunk_size) obs_het_var_distrib, bins1 = distrib # Calculate observed heterozygosity distribution by sample obs_het_by_sample = calc_obs_het_by_sample(variations, chunk_size=chunk_size) obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample, n_bins=25, range_=(0, 1)) # Plot distributions fpath = join(data_dir, 'obs_het.png') fhand = open(fpath, 'w') fig = Figure(figsize=(10, 10)) canvas = FigureCanvas(fig) axes = fig.add_subplot(211) title = 'SNP observed Heterozygosity distribution' plot_distrib(obs_het_var_distrib, bins=bins1, fhand=open(fpath, 'w'), mpl_params={'set_xlabel': {'args': ['Heterozygosity'], 'kwargs': {}}, 'set_ylabel': {'args': ['SNP number'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}, 'set_yscale': {'args': ['log'], 'kwargs': {}}}, axes=axes, color='c') axes = fig.add_subplot(212) title = 'Sample observed Heterozygosity distribution' plot_distrib(obs_het_sample_distrib, bins=bins2, fhand=open(fpath, 'w'), mpl_params={'set_xlabel': {'args': ['Heterozygosity'], 'kwargs': {}}, 'set_ylabel': {'args': ['Sample number'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}}, axes=axes, color='c') canvas.print_figure(fhand)
def plot_obs_het(variations, data_dir, chunk_size=SNPS_PER_CHUNK, min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT): # Calculate observed heterozygosity distribution by snp _calc_obs_het_by_var = partial(calc_obs_het, min_num_genotypes=min_num_genotypes) distrib = histogram_for_chunks(variations, calc_funct=_calc_obs_het_by_var, n_bins=25, range_=(0, 1), chunk_size=chunk_size) obs_het_var_distrib, bins1 = distrib # Calculate observed heterozygosity distribution by sample obs_het_by_sample = calc_obs_het_by_sample(variations, chunk_size=chunk_size) obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample, n_bins=25, range_=(0, 1)) # Plot distributions fpath = join(data_dir, 'obs_het.png') fhand = open(fpath, 'w') fig = Figure(figsize=(10, 10)) canvas = FigureCanvas(fig) axes = fig.add_subplot(211) title = 'SNP observed Heterozygosity distribution' plot_distrib(obs_het_var_distrib, bins=bins1, fhand=open(fpath, 'w'), mpl_params={ 'set_xlabel': { 'args': ['Heterozygosity'], 'kwargs': {} }, 'set_ylabel': { 'args': ['SNP number'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} }, 'set_yscale': { 'args': ['log'], 'kwargs': {} } }, axes=axes, color='c') axes = fig.add_subplot(212) title = 'Sample observed Heterozygosity distribution' plot_distrib(obs_het_sample_distrib, bins=bins2, fhand=open(fpath, 'w'), mpl_params={ 'set_xlabel': { 'args': ['Heterozygosity'], 'kwargs': {} }, 'set_ylabel': { 'args': ['Sample number'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } }, axes=axes, color='c') canvas.print_figure(fhand)