Ejemplo n.º 1
0
def get_sample_cdf(samples):
    dtype = 'tot'
    snp_nums = {chrom: Counter() for chrom in ['mt', 'nuc']}
    cov = {chrom: 0 for chrom in ['mt', 'nuc']}
    for sample in samples.values():
        coverage, snp_freq = sample.mt.get_data(dtype)
        cov['mt'] += np.count_nonzero(coverage >= min_cov)
        snps = sample.mt.get_snps(min_freq, min_cov, dtype)
        inds = np.nonzero(snps)[0]
        snp_nums['mt'] += Counter(snps[inds])

        coverage, snp_freq = sample.nuc.get_data(dtype)
        cov['nuc'] += np.count_nonzero(coverage >= min_cov)
        snps = sample.nuc.get_snps(min_freq, min_cov, dtype)
        inds = np.nonzero(snps)[0]
        snp_nums['nuc'] += Counter(snps[inds])
    x_mit, cdf_mit = cs.cdf(snp_nums['mt'])
    x, frac_mit = cs.cdf(snp_nums['mt'], norm=False)
    x_nuc, cdf_nuc = cs.cdf(snp_nums['nuc'])
    x, frac_nuc = cs.cdf(snp_nums['nuc'], norm=False)
    frac_mit = frac_mit / cov['mt']
    frac_nuc = frac_nuc / cov['nuc']
    mito = {'x': x_mit, 'cdf': cdf_mit, 'frac': frac_mit}
    nuc = {'x': x_nuc, 'cdf': cdf_nuc, 'frac': frac_nuc}
    return mito, nuc
Ejemplo n.º 2
0
 def snp_cdf(self, min_freq, min_cov, frac=True, norm=False):
     cov_ind = np.count_nonzero(self.coverage >= min_cov)
     freqs = self.get_snps(min_freq, min_cov)
     inds = np.nonzero(freqs)[0]
     counts = Counter(freqs[inds])
     x, c = cs.cdf(counts, norm=norm)
     if frac:
         c = c / cov_ind
     return x, c
Ejemplo n.º 3
0
 def snp_cdf(self, min_freq, min_cov, dtype='tot', frac=True, norm=False):
     #coverage, snp_freq = self.get_data(dtype)
     cov_ind = np.count_nonzero(self.coverage[dtype] >= min_cov)
     freqs, locus = self.get_snps(min_freq, min_cov, dtype=dtype)
     #inds = np.nonzero(freqs)[0]
     counts = Counter(freqs)
     x, c = cs.cdf(counts, norm=norm)
     if frac:
         c = c / cov_ind
     return x, c
Ejemplo n.º 4
0
mpl.rcParams.update({
    'text.usetex': True,
    'font.family': 'serif',
    'font.serif': 'Computer Modern Roman',
    'font.size': 14,
    'figure.autolayout': True
})

fnames = []
savefigs = True

data11 = np.load('Desai/alignment_srr5406290.npz'
                 )  #Yeast_Nanopore_Aug30_barcode07/alignment_barcode07.npz')
#data12 = np.load('Yeast_Nanopore_Aug30_barcode09/alignment_barcode09.npz')

x11_nuc, cdf11_nuc = cs.cdf(data11['nuc_len'][()])
x11_mt, cdf11_mt = cs.cdf(data11['mt_len'][()])
#x12_nuc, cdf12_nuc = cs.cdf(data12['nuc_len'][()])
#x12_mt, cdf12_mt = cs.cdf(data12['mt_len'][()])
#
fig_len = plt.figure()
fig_len.gca().plot(x11_mt, 1 - cdf11_mt, 'C0-', label='mitochondria')
fig_len.gca().plot(x11_nuc, 1 - cdf11_nuc, 'C1-', label='nuclear')
#fig_len.gca().semilogy(x12_mt * 1e-3, 1 - cdf12_mt, 'C0--')
#fig_len.gca().semilogy(x12_nuc * 1e-3, 1 - cdf12_nuc, 'C1--')
fig_len.gca().set_xlabel(r'$r$ (bp)')
##fig_len.gca().set_title('1 - CDF of read length')
fig_len.gca().set_title(r'$\mathrm{Prob}(\mathrm{read\; length} > r)$',
                        fontsize=14)
handles, labels = fig_len.gca().get_legend_handles_labels()
line11 = mlines.Line2D([], [], color='k', linestyle='-')