def plot_minor_allele_frequency_filtered(data_folder, adaID, fragments, VERBOSE=0, savefig=False): '''Plot minor allele frequency along the genome''' nus = np.load(get_merged_allele_frequencies_filename(data_folder, adaID, fragments)) nu_min = np.ma.masked_all(nus.shape[-1]) for pos, nutmp in enumerate(nus.T): try: if not np.ma.is_masked(nutmp): nu_min[pos] = np.sort(nutmp)[-2] except ValueError: print pos, np.ma.is_masked(nutmp) import ipdb; ipdb.set_trace() import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1, figsize=(15, 8)) ax.plot(nu_min, lw=1.5, c='k') ax.scatter(np.arange(len(nu_min)), nu_min, s=30, c='k') ax.set_yscale('log') ax.set_xlabel('Position') ax.set_ylabel(r'$\nu$', fontsize=20) ax.set_title('adaID '+adaID+', '+'-'.join(fragments)) ax.set_xlim(-100, len(nu_min) + 100) plt.tight_layout() if savefig: from hivwholeseq.sequencing.filenames import \ get_minor_allele_frequency_merged_figure_filename as gff outputfile = gff(data_folder, adaID, fragments) fig.savefig(outputfile) plt.close(fig) else: plt.ion() plt.show()
def plot_minor_allele_frequency_filtered(data_folder, adaID, fragments, VERBOSE=0, savefig=False): '''Plot minor allele frequency along the genome''' nus = np.load( get_merged_allele_frequencies_filename(data_folder, adaID, fragments)) nu_min = np.ma.masked_all(nus.shape[-1]) for pos, nutmp in enumerate(nus.T): try: if not np.ma.is_masked(nutmp): nu_min[pos] = np.sort(nutmp)[-2] except ValueError: print pos, np.ma.is_masked(nutmp) import ipdb ipdb.set_trace() import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1, figsize=(15, 8)) ax.plot(nu_min, lw=1.5, c='k') ax.scatter(np.arange(len(nu_min)), nu_min, s=30, c='k') ax.set_yscale('log') ax.set_xlabel('Position') ax.set_ylabel(r'$\nu$', fontsize=20) ax.set_title('adaID ' + adaID + ', ' + '-'.join(fragments)) ax.set_xlim(-100, len(nu_min) + 100) plt.tight_layout() if savefig: from hivwholeseq.sequencing.filenames import \ get_minor_allele_frequency_merged_figure_filename as gff outputfile = gff(data_folder, adaID, fragments) fig.savefig(outputfile) plt.close(fig) else: plt.ion() plt.show()
samples = dataset.samples if adaIDs is not None: samples = samples.loc[samples.adapter.isin(adaIDs)] if VERBOSE >= 3: print 'adaIDs', samples.adapter for samplename, sample in samples.iterrows(): sample = SampleSeq(sample) adaID = sample.adapter if VERBOSE >= 1: print adaID, samplename fragments = [fr[:2] for fr in sample.regions_complete] if (len(fragments) != 6) and (VERBOSE >= 1): print 'WARNING: only '+str(len(fragments))+' regions found!' # Write one or more merged consensi consensus = merge_consensi(data_folder, adaID, fragments, VERBOSE=VERBOSE) for (frags, cons) in consensus: output_filename = get_merged_consensus_filename(data_folder, adaID, frags) SeqIO.write(cons, output_filename, 'fasta') # Write allele frequencies if do_nus: nu = merge_allele_frequencies(data_folder, adaID, fragments, VERBOSE=VERBOSE) for (frags, nuf) in nu: output_filename = get_merged_allele_frequencies_filename(data_folder, adaID, frags) nuf.dump(output_filename)