'A': 'EVD68_SWE_039_160831-1_NFLG', 'B': 'EVD68_SWE_039_160831-2_NFLG' } } days = { 'SWE_012': '2 days', 'SWE_021': '7 days', 'SWE_024': '1 day', 'SWE_037': '1 day', 'SWE_039': '0 days' } for pt in samps: for key in samps[pt]: ac, ins = load_allele_counts(sample_location + samps[pt][key]) sample = pt + '-' + key cov[sample] = coverage(ac[0][1]) freqs[sample] = trim_ac(ac, n_states=5) major_freqs[sample] = { ref: np.max(x, axis=0) for ref, x in freqs[sample].items() } ################################## ################################## #positions not to plot #These positions are just before CDS, and show up in #4/5 of these samples.... exclude = [690, 694]
from create_allele_counts import load_allele_counts, nuc_alpha from coverage_consensus_diversity import coverage, consensus, get_fragment_boundaries from minor_variant import trim_ac from helpers import name_translations if __name__ == '__main__': #ntt = name_translations('name_translation_table.tsv') #labid = '16CA403716' #name = ntt[labid] sample_location = 'samples_by_strain/' name = "EVD68_SWE_029_160904_NFLG" fs=16 ac, ins = load_allele_counts(sample_location+name+"/") primer_boundaries = get_fragment_boundaries('primers.csv', ac) cov = coverage(ac[0][1]) ref='KX675261.1' plt.figure(figsize=(8,4)) plt.plot(cov, lw=3) for p in primer_boundaries[ref]: y = 50 if int(p[1])%2 else 70 plt.plot([primer_boundaries[ref][p]['start'], primer_boundaries[ref][p]['end']],[y,y], lw=7, c=(0.7, 0.7, 0.7)) plt.xlabel('position in genome', fontsize=fs) plt.ylabel('coverage', fontsize=fs) plt.ylim(30,10000) plt.yscale('log') plt.tick_params(labelsize=0.8*fs)
import matplotlib.pyplot as plt #import seaborn as sns from create_allele_counts import load_allele_counts from coverage_consensus_diversity import coverage, consensus from minor_variant import trim_ac from helpers import add_panel_label plt.ion() if __name__ == '__main__': freqs = {} major_freqs = {} cov = {} for sample in ['JA-A', 'JA-B', 'QC_JA-A', 'QC_JA-B']: ac, ins = load_allele_counts('mapped_data/' + sample) cov[sample] = coverage(ac[0][1]) freqs[sample] = trim_ac(ac, n_states=5) major_freqs[sample] = { ref: np.max(x, axis=0) for ref, x in freqs[sample].items() } fs = 24 fig, axs = plt.subplots(1, 2, figsize=(12, 6)) axs[0].plot([0.001, 1.0], [0.001, 1.0], c='k') min_cov = 2000 for s in 'AB': for ref in major_freqs['JA-' + s]: good_ind = (cov['JA-' + s] > min_cov) & (cov['QC_JA-' + s] > min_cov)
# Script if __name__ == '__main__': # Parse input args parser = argparse.ArgumentParser(description='plot coverage, diversity and output consensus sequence', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--sample', required=True, type=str, help='the sample to analyze') parser.add_argument('--out_dir', required=True, type=str, help='directory to output') parser.add_argument('--min_cov', type=int, default=1000, help='minimal coverage to call consensus') parser.add_argument('--primers', type=str, help='file with primers to mask in diversity calculation') parser.add_argument('--min_freq', type=float, default=0.05, help='minimal frequency to accept minor variant') args = parser.parse_args() stats = {} ac,ins = load_allele_counts(args.sample) if args.primers: primer_masks = get_primer_mask(args.primers, ac) freqs = trim_ac(ac) sample = args.sample.split('/')[-1] major_freq = {ref:np.max(x, axis=0) for ref, x in freqs.items()} minor_seqs = {} any_minors = False seqs = [] from Bio import SeqIO, SeqRecord, Seq for ref, counts in ac: print("ref", ref)
from create_allele_counts import load_allele_counts, nuc_alpha from coverage_consensus_diversity import coverage, consensus, get_fragment_boundaries from minor_variant import trim_ac from helpers import add_panel_label, name_translations if __name__ == '__main__': freqs = {} major_freqs = {} major_seqs = {} cov = {} snames = ['16CA514285', '16CA403717', '14CA515617'] ntt = name_translations('name_translation_table.tsv') for sname in snames: ac, ins = load_allele_counts('mapped_data/' + sname) primer_boundaries = get_fragment_boundaries('primers.csv', ac) cov[sname] = coverage(ac[0][1]) freqs[sname] = trim_ac(ac, n_states=5) major_freqs[sname] = { ref: np.max(x, axis=0) for ref, x in freqs[sname].items() } major_seqs[sname] = { ref: nuc_alpha[np.argmax(x, axis=0)] for ref, x in freqs[sname].items() } min_cov = 1000 ref = 'KX675261.1' cp_labels = {0: 'non coding', 1: '1st', 2: '2nd', 3: '3rd'}
from helpers import add_panel_label, name_translations if __name__ == '__main__': sample_location = "samples_by_strain2/" freqs = {} major_freqs = {} major_seqs = {} cov = {} #snames = ['16CA514285', '16CA403717', '14CA515617'] #snames = ["EVD68_SWE_045_160831_NFLG", "EVD68_SWE_046_160904_NFLG", "EVD68_SWE_007_140908_NFLG"] #ntt = name_translations('name_translation_table.tsv') snames = ["EVD68_BEL_009_18XXXX_NFLG"] for sname in snames: ac, ins = load_allele_counts(sample_location + sname) primer_boundaries = get_fragment_boundaries('primers.csv', ac) cov[sname] = coverage(ac[0][1]) freqs[sname] = trim_ac(ac, n_states=5) major_freqs[sname] = { ref: np.max(x, axis=0) for ref, x in freqs[sname].items() } major_seqs[sname] = { ref: nuc_alpha[np.argmax(x, axis=0)] for ref, x in freqs[sname].items() } min_cov = 1000 ref = 'KX675261.1' cp_labels = {0: 'non coding', 1: '1st', 2: '2nd', 3: '3rd'}
from create_allele_counts import load_allele_counts from coverage_consensus_diversity import coverage, consensus from minor_variant import trim_ac from helpers import add_panel_label plt.ion() if __name__ == '__main__': freqs = {} major_freqs = {} cov = {} samples = glob.glob('mapped_data/1*') for sample in samples: ac,ins = load_allele_counts(sample) sname = sample.rstrip('/').split('/')[-1] cov[sname] = coverage(ac[0][1] ) freqs[sname] = trim_ac(ac, n_states=5) major_freqs[sname] = {ref:np.max(x, axis=0) for ref, x in freqs[sname].items()} #cutoffs = [0.01, 0.03, 0.1] cutoffs = [0.0005,0.001, 0.002, 0.005, 0.01, 0.02, 0.03, 0.05, 0.1] display_cutoffs = [0.01, 0.03, 0.1] min_cov = 1000 n_minor = [] snames = sorted(list(major_freqs.keys())) ref='KX675261.1' for sname in snames: good_ind = cov[sname]>min_cov tmp = []
type=str, help='file with primers to mask in diversity calculation') parser.add_argument('--min_cov', type=int, default=100, help='minimal coverage to call consensus') parser.add_argument( '--all_counts', action="store_true", default=False, help="plot coverage/diversity for all count files found") args = parser.parse_args() stats = {} ac, ins = load_allele_counts(args.sample, allCounts=args.all_counts) primer_masks = get_primer_mask(args.primers, ac) primer_boundaries = get_fragment_boundaries(args.primers, ac) sample = args.sample.split('/')[-1] stats = plot_coverage_concatenated(sample, ac, args.out_dir + '/figures/coverage.png', primer_boundaries=primer_boundaries) div = plot_diversity(sample, ac, args.out_dir + "/figures/diversity.png", primer_masks, primer_boundaries=primer_boundaries) for k, v in list(div.items()): stats[k].update(v)
import matplotlib.pyplot as plt #import seaborn as sns from create_allele_counts import load_allele_counts, nuc_alpha from coverage_consensus_diversity import coverage, consensus, get_fragment_boundaries from minor_variant import trim_ac from helpers import name_translations if __name__ == '__main__': ntt = name_translations('name_translation_table.tsv') labid = ### redacted sample name name = ntt[labid] fs=16 ac, ins = load_allele_counts('mapped_data/'+labid) primer_boundaries = get_fragment_boundaries('primers.csv', ac) cov = coverage(ac[0][1]) ref='KX675261.1' plt.figure() plt.plot(cov, lw=3) for p in primer_boundaries[ref]: y = 50 if int(p[1])%2 else 70 plt.plot([primer_boundaries[ref][p]['start'], primer_boundaries[ref][p]['end']],[y,y], lw=7, c=(0.7, 0.7, 0.7)) plt.xlabel('position in genome', fontsize=fs) plt.ylabel('coverage', fontsize=fs) plt.ylim(30,10000) plt.yscale('log') plt.tick_params(labelsize=0.8*fs)