from coverage_consensus_diversity import coverage, consensus, get_fragment_boundaries from minor_variant import trim_ac from helpers import name_translations if __name__ == '__main__': #ntt = name_translations('name_translation_table.tsv') #labid = '16CA403716' #name = ntt[labid] sample_location = 'samples_by_strain/' name = "EVD68_SWE_029_160904_NFLG" fs=16 ac, ins = load_allele_counts(sample_location+name+"/") primer_boundaries = get_fragment_boundaries('primers.csv', ac) cov = coverage(ac[0][1]) ref='KX675261.1' plt.figure(figsize=(8,4)) plt.plot(cov, lw=3) for p in primer_boundaries[ref]: y = 50 if int(p[1])%2 else 70 plt.plot([primer_boundaries[ref][p]['start'], primer_boundaries[ref][p]['end']],[y,y], lw=7, c=(0.7, 0.7, 0.7)) plt.xlabel('position in genome', fontsize=fs) plt.ylabel('coverage', fontsize=fs) plt.ylim(30,10000) plt.yscale('log') plt.tick_params(labelsize=0.8*fs) plt.title(name, fontsize=fs) plt.tight_layout()
primer_masks = get_primer_mask(args.primers, ac) freqs = trim_ac(ac) sample = args.sample.split('/')[-1] major_freq = {ref:np.max(x, axis=0) for ref, x in freqs.items()} minor_seqs = {} any_minors = False seqs = [] from Bio import SeqIO, SeqRecord, Seq for ref, counts in ac: print("ref", ref) consensus_seq = consensus(counts, min_cov=args.min_cov) cov = coverage(counts) div_pos = np.where((major_freq[ref]<1.0-args.min_freq)&(cov>args.min_cov))[0] alterations = [] insertions_to_include = [] for pos in div_pos: tmp_freqs = freqs[ref][:, pos] if sorted(tmp_freqs)[-2]>args.min_freq: ii = np.argsort(tmp_freqs)[-2] alterations.append([pos, nuc_alpha[ii], tmp_freqs[ii]]) if alterations: print(sample, ref, 'minor variants', alterations) consensus_seq[[p for p,n,f in alterations]] = [n for p,n,f in alterations] any_minors = True for pos in ins[ref]:
} } days = { 'SWE_012': '2 days', 'SWE_021': '7 days', 'SWE_024': '1 day', 'SWE_037': '1 day', 'SWE_039': '0 days' } for pt in samps: for key in samps[pt]: ac, ins = load_allele_counts(sample_location + samps[pt][key]) sample = pt + '-' + key cov[sample] = coverage(ac[0][1]) freqs[sample] = trim_ac(ac, n_states=5) major_freqs[sample] = { ref: np.max(x, axis=0) for ref, x in freqs[sample].items() } ################################## ################################## #positions not to plot #These positions are just before CDS, and show up in #4/5 of these samples.... exclude = [690, 694] fs = 12
from minor_variant import trim_ac from helpers import add_panel_label, name_translations if __name__ == '__main__': freqs = {} major_freqs = {} major_seqs = {} cov = {} snames = ['16CA514285', '16CA403717', '14CA515617'] ntt = name_translations('name_translation_table.tsv') for sname in snames: ac, ins = load_allele_counts('mapped_data/' + sname) primer_boundaries = get_fragment_boundaries('primers.csv', ac) cov[sname] = coverage(ac[0][1]) freqs[sname] = trim_ac(ac, n_states=5) major_freqs[sname] = { ref: np.max(x, axis=0) for ref, x in freqs[sname].items() } major_seqs[sname] = { ref: nuc_alpha[np.argmax(x, axis=0)] for ref, x in freqs[sname].items() } min_cov = 1000 ref = 'KX675261.1' cp_labels = {0: 'non coding', 1: '1st', 2: '2nd', 3: '3rd'} variable_sites = {} fig, axs = plt.subplots(len(snames),