def plot_cumulative_histogram(data_folder, adaID, fragment, insert_sizes, title=None, ax=None, show=False, savefig=False, **kwargs): '''Plot cumulative histogram of insert sizes''' import matplotlib.pyplot as plt if ax is None: fig, ax = plt.subplots(1, 1) ax.plot(insert_sizes, np.linspace(0, 1, len(insert_sizes)), **kwargs) ax.set_xlabel('Insert size') ax.set_ylabel('Cumulative fraction') ax.set_xlim(-1, 1000) ax.set_ylim(-0.02, 1.02) if title is not None: ax.set_title(title) plt.tight_layout() if show: plt.ion() plt.show() if savefig: output_filename = get_insert_size_distribution_cumulative_filename(data_folder, adaID, fragment) from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder mkdirs(get_figure_folder(data_folder, adaID)) fig.savefig(output_filename)
def make_output_folders(data_folder, adaID, VERBOSE=0): '''Make output folders for the script''' from hivwholeseq.utils.generic import mkdirs dirname = data_folder + foldername_adapter(adaID) + 'map_iter/' mkdirs(dirname) if VERBOSE: print 'Folder created:', dirname
def make_output_folders(data_folder, adapters_designed, VERBOSE=0, summary=True): '''Make output folders for all adapters and unclassified (e.g. PhiX)''' from hivwholeseq.utils.generic import mkdirs # Make folders for the samples for (adaID, s) in adapters_designed: dirname = foldername_adapter(adaID) mkdirs(data_folder + dirname) if VERBOSE: print 'Folder created:', dirname # Make a default directory for unclassified reads mkdirs(data_folder + 'unclassified_reads') if VERBOSE: print 'Folder created: unclassified reads' if summary: with open(get_demultiplex_summary_filename(data_folder), 'a') as f: f.write('\n') f.write( 'Folders created for samples and unclassified reads (including phix).' ) f.write('\n')
def plot_histogram(data_folder, adaID, fragment, h, title=None, ax=None, show=False, savefig=False, **kwargs): '''Plot histogram of insert sizes''' import matplotlib.pyplot as plt if ax is None: fig, ax = plt.subplots(1, 1) if title is not None: ax.set_title(title) x = 0.5 * (h[1][1:] + h[1][:-1]) y = h[0] ax.plot(x, y, **kwargs) ax.set_xlabel('Insert size') ax.set_ylabel('Density') plt.tight_layout() if show: plt.ion() plt.show() if savefig: output_filename = get_insert_size_distribution_filename(data_folder, adaID, fragment) from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder mkdirs(get_figure_folder(data_folder, adaID)) plt.savefig(output_filename)
def make_output_folders(data_folder, adaID, VERBOSE=0): '''Make output folders for the script''' from hivwholeseq.utils.generic import mkdirs dirname = data_folder+foldername_adapter(adaID)+'map_iter/' mkdirs(dirname) if VERBOSE: print 'Folder created:', dirname
def plot_quality_along_reads(data_folder, adaID, title, quality, VERBOSE=0, savefig=False): """Plot the results of the quality scores along reads""" import matplotlib.pyplot as plt from matplotlib import cm fig, axs = plt.subplots(1, 2, figsize=(16, 9)) for i, (ax, qual) in enumerate(izip(axs, quality)): for j, qpos in enumerate(qual): x = qpos y = np.linspace(0, 1, len(x))[::-1] ax.plot(x, y, color=cm.jet(int(255.0 * j / len(qual))), alpha=0.5, lw=2) ax.set_xlabel("Phred quality", fontsize=14) ax.set_ylabel("Fraction of bases above quality x", fontsize=14) ax.set_title("Read" + str(i + 1), fontsize=16) ax.text(2, 0.03, "blue to red: 0 to " + str(len(qual)) + " base", fontsize=18) fig.suptitle(title, fontsize=20) if savefig: from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder, get_quality_along_reads_filename fig_folder = get_figure_folder(data_folder, adaID) fig_filename = get_quality_along_reads_filename(data_folder, adaID) mkdirs(fig_folder) fig.savefig(fig_filename) else: plt.tight_layout() plt.ion() plt.show()
def plot_quality_along_reads(data_folder, adaID, title, quality, VERBOSE=0, savefig=False): '''Plot the results of the quality scores along reads''' import matplotlib.pyplot as plt from matplotlib import cm fig, axs = plt.subplots(1, 2, figsize=(16, 9)) for i, (ax, qual) in enumerate(izip(axs, quality)): for j, qpos in enumerate(qual): x = qpos y = np.linspace(0, 1, len(x))[::-1] ax.plot(x, y, color=cm.jet(int(255.0 * j / len(qual))), alpha=0.5, lw=2) ax.set_xlabel('Phred quality', fontsize=14) ax.set_ylabel('Fraction of bases above quality x', fontsize=14) ax.set_title('Read'+str(i+1), fontsize=16) ax.text(2, 0.03, 'blue to red: 0 to '+str(len(qual))+' base', fontsize=18) fig.suptitle(title, fontsize=20) if savefig: from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder, \ get_quality_along_reads_filename fig_folder = get_figure_folder(data_folder, adaID) fig_filename = get_quality_along_reads_filename(data_folder, adaID) mkdirs(fig_folder) fig.savefig(fig_filename) else: plt.tight_layout() plt.ion() plt.show()
def make_output_folders(data_folder, adaID, VERBOSE=0): '''Make output folders''' from hivwholeseq.utils.generic import mkdirs output_filename = get_divided_filenames(data_folder, adaID, fragments=['F1'])[0] dirname = os.path.dirname(output_filename) mkdirs(dirname) if VERBOSE: print 'Folder created:', dirname
def copy_folder(patient, dst_fn, foldername): '''Copy a whole folder''' src_fn = patient.folder+foldername+os.sep map_fn = dst_fn+foldername+os.sep mkdirs(map_fn) for fn_src in src_fn.listdir(): copy(src_fn+fn_src, map_fn+fn_src)
def plot_cuts_quality_along_reads(data_folder, adaID, quality, title='', VERBOSE=0, savefig=False): '''Plot some cuts of the quality along the read''' from scipy.stats import percentileofscore as pof import matplotlib.pyplot as plt from matplotlib import cm fig, axs = plt.subplots(1, 2, figsize=(14, 8)) qthreshs = [10, 20, 30, 35] for i, (ax, qual) in enumerate(izip(axs, quality)): for j, qthresh in enumerate(qthreshs): x = np.arange(len(qual)) y = np.array( [100 - pof(qual[k], qthresh) for k in xrange(len(qual))]) ax.plot(x, y, color=cm.jet(int(255.0 * j / len(qthreshs))), alpha=0.8, lw=2, label='Q = ' + str(qthresh)) ax.set_xlabel('Position [bp]', fontsize=14) ax.set_ylabel('Percentage of bases above quality x', fontsize=14) ax.set_title('Read' + str(i + 1), fontsize=16) ax.set_ylim(-1, 101) ax.set_xlim(-1, len(qual) + 1) ax.legend(loc='best') if title: fig.suptitle(title, fontsize=20) if savefig: from hivwholeseq.utils.generic import mkdirs if savefig == True: from hivwholeseq.sequencing.filenames import get_figure_folder, \ get_quality_along_reads_filename fig_folder = get_figure_folder(data_folder, adaID) fig_filename = get_quality_along_reads_filename(data_folder, adaID, simple=True) elif isinstance(savefig, basestring): import os fig_filename = savefig fig_folder = os.path.dirname(fig_filename) else: raise ValueError( 'savefig must be a bool or a figure filename (string)') mkdirs(fig_folder) fig.savefig(fig_filename) else: plt.tight_layout() plt.ion() plt.show()
def make_output_folders(data_folder, adaID, VERBOSE=0, summary=True): '''Make output folders''' from hivwholeseq.utils.generic import mkdirs outfiles = [get_premapped_filename(data_folder, adaID)] if summary: outfiles.append(get_coverage_figure_filename(data_folder, adaID, 'premapped')) for outfile in outfiles: dirname = os.path.dirname(outfile) mkdirs(dirname) if VERBOSE: print 'Folder created:', dirname
def make_output_folders(data_folder, adaIDs, VERBOSE=0): '''Make output folders for symlinking''' from hivwholeseq.utils.generic import mkdirs mkdirs(data_folder) if VERBOSE >= 1: print 'Folder created:', data_folder for adaID in adaIDs + [-1]: mkdirs(data_folder+foldername_adapter(adaID)) if VERBOSE >= 1: print 'Folder created:', data_folder+foldername_adapter(adaID)
def report_coverage(data_folder, adaID, VERBOSE=0, summary=True): '''Produce a report on rough coverage on reference (ignore inserts)''' ref_filename = get_reference_premap_filename(data_folder, adaID) refseq = SeqIO.read(ref_filename, 'fasta') # Prepare data structures coverage = np.zeros(len(refseq), int) # Parse the BAM file unmapped = 0 mapped = 0 bamfilename = get_premapped_filename(data_folder, adaID, type='bam') with pysam.Samfile(bamfilename, 'rb') as bamfile: for read in bamfile: if read.is_unmapped or (not read.is_proper_pair) or (not len( read.cigar)): unmapped += 1 continue # Proceed along CIGARs ref_pos = read.pos for (bt, bl) in read.cigar: if bt not in (0, 2): continue # Treat deletions as 'covered' coverage[ref_pos:ref_pos + bl] += 1 ref_pos += bl mapped += 1 # Save results from hivwholeseq.sequencing.filenames import get_coverage_figure_filename import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1, figsize=(13, 6)) ax.plot(np.arange(len(refseq)), coverage + 1, lw=2, c='b') ax.set_xlabel('Position') ax.set_ylabel('Coverage') ax.set_yscale('log') ax.set_title('adaID ' + adaID + ', premapped', fontsize=18) ax.set_xlim(-20, len(refseq) + 20) plt.tight_layout() from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder mkdirs(get_figure_folder(data_folder, adaID)) plt.savefig(get_coverage_figure_filename(data_folder, adaID, 'premapped')) plt.close(fig) if summary: with open(get_premap_summary_filename(data_folder, adaID), 'a') as f: f.write('\nPremapping results: '+\ str(mapped)+' read pairs mapped, '+str(unmapped)+' unmapped\n') f.write('\nCoverage plotted: '+\ get_coverage_figure_filename(data_folder, adaID, 'premapped')+'\n')
def make_output_folders(data_folder, adaID, VERBOSE=0, summary=True): '''Make output folders''' from hivwholeseq.utils.generic import mkdirs outfiles = [get_premapped_filename(data_folder, adaID)] if summary: outfiles.append( get_coverage_figure_filename(data_folder, adaID, 'premapped')) for outfile in outfiles: dirname = os.path.dirname(outfile) mkdirs(dirname) if VERBOSE: print 'Folder created:', dirname
def copy_initial_reference(patient, dst_fn): '''Copy initial patient mapping reference''' ref_fn = dst_fn+'reference/' mkdirs(ref_fn) for fragment in fragments: fn_src = patient.get_reference_filename(fragment) fn_dst = ref_fn+os.path.basename(fn_src) shutil.copy(fn_src, fn_dst) fn_src = patient.get_reference_filename('genomewide', format='gb') fn_dst = ref_fn+os.path.basename(fn_src) shutil.copy(fn_src, fn_dst)
def make_output_folders(pname, samplename, PCR=1, VERBOSE=0): '''Make the output folders if necessary for hash and map''' hash_foldername = os.path.dirname(get_initial_hash_filename(pname, 'F0')) map_foldername = get_mapped_to_initial_foldername(pname, samplename, PCR=PCR) if not os.path.isdir(hash_foldername): mkdirs(hash_foldername) if VERBOSE: print 'Folder created:', hash_foldername mkdirs(map_foldername) if VERBOSE: print 'Folder created:', map_foldername
def plot_cuts_quality_along_reads(data_folder, adaID, quality, title="", VERBOSE=0, savefig=False): """Plot some cuts of the quality along the read""" from scipy.stats import percentileofscore as pof import matplotlib.pyplot as plt from matplotlib import cm fig, axs = plt.subplots(1, 2, figsize=(14, 8)) qthreshs = [10, 20, 30, 35] for i, (ax, qual) in enumerate(izip(axs, quality)): for j, qthresh in enumerate(qthreshs): x = np.arange(len(qual)) y = np.array([100 - pof(qual[k], qthresh) for k in xrange(len(qual))]) ax.plot(x, y, color=cm.jet(int(255.0 * j / len(qthreshs))), alpha=0.8, lw=2, label="Q = " + str(qthresh)) ax.set_xlabel("Position [bp]", fontsize=14) ax.set_ylabel("Percentage of bases above quality x", fontsize=14) ax.set_title("Read" + str(i + 1), fontsize=16) ax.set_ylim(-1, 101) ax.set_xlim(-1, len(qual) + 1) ax.legend(loc="best") if title: fig.suptitle(title, fontsize=20) if savefig: from hivwholeseq.utils.generic import mkdirs if savefig == True: from hivwholeseq.sequencing.filenames import get_figure_folder, get_quality_along_reads_filename fig_folder = get_figure_folder(data_folder, adaID) fig_filename = get_quality_along_reads_filename(data_folder, adaID, simple=True) elif isinstance(savefig, basestring): import os fig_filename = savefig fig_folder = os.path.dirname(fig_filename) else: raise ValueError("savefig must be a bool or a figure filename (string)") mkdirs(fig_folder) fig.savefig(fig_filename) else: plt.tight_layout() plt.ion() plt.show()
def make_output_folders(data_folder, adapters_designed, VERBOSE=0, summary=True): '''Make output folders for all adapters and unclassified (e.g. PhiX)''' from hivwholeseq.utils.generic import mkdirs # Make folders for the samples for (adaID, s) in adapters_designed: dirname = foldername_adapter(adaID) mkdirs(data_folder+dirname) if VERBOSE: print 'Folder created:', dirname # Make a default directory for unclassified reads mkdirs(data_folder+'unclassified_reads') if VERBOSE: print 'Folder created: unclassified reads' if summary: with open(get_demultiplex_summary_filename(data_folder), 'a') as f: f.write('\n') f.write('Folders created for samples and unclassified reads (including phix).') f.write('\n')
def predict_RNA_structure(seq, label='seq', maxstructs=1, VERBOSE=0): '''Predict RNA secondary structures using RNAstructure''' import os import subprocess as sp from hivwholeseq.utils.generic import mkdirs from Bio import SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.Alphabet.IUPAC import ambiguous_dna rna_fold_bin = '/ebio/ag-neher/home/fzanini/programs/RNAstructure_cli/exe/Fold' # Make tmp input file tmp_file_in = '/ebio/ag-neher/home/fzanini/tmp/RNAfold/'+label+'.fasta' tmp_file_out = '/ebio/ag-neher/home/fzanini/tmp/RNAfold/'+label+'.ct' mkdirs(os.path.dirname(tmp_file_in)) seqrec = SeqRecord(Seq(seq, ambiguous_dna), id=label, name=label, description='') SeqIO.write(seqrec, tmp_file_in, 'fasta') # Call RNAStructure with all the crap (env vars, etc) rna_tables = '/ebio/ag-neher/home/fzanini/programs/RNAstructure_cli/data_tables/' env = os.environ.copy() env['DATAPATH'] = rna_tables call_list = [rna_fold_bin, '-m', str(maxstructs), tmp_file_in, tmp_file_out] if VERBOSE >= 2: print ' '.join(call_list) output = sp.check_output(call_list, shell=False) if VERBOSE >= 3: print output if 'Writing output ct file...done.' in output: structs = parse_ct_file_multiple(tmp_file_out) else: IOError('RNAstructure had problems predicting the structure') return structs
def plot_cuts_quality_along_reads(data_folder, adaID, title, quality, VERBOSE=0, savefig=False): '''Plot some cuts of the quality along the read''' from scipy.stats import percentileofscore as pof import matplotlib.pyplot as plt from matplotlib import cm fig, axs = plt.subplots(1, 2, figsize=(14, 8)) qthreshs = [10, 20, 30, 35] for i, (ax, qual) in enumerate(izip(axs, quality)): for j, qthresh in enumerate(qthreshs): x = np.arange(len(qual)) y = np.array([100 - pof(qual[k], qthresh) for k in xrange(len(qual))]) ax.plot(x, y, color=cm.jet(int(255.0 * j / len(qthreshs))), alpha=0.8, lw=2, label='Q = '+str(qthresh)) ax.set_xlabel('Position [bp]', fontsize=14) ax.set_ylabel('Percentage of bases above quality x', fontsize=14) ax.set_title('Read'+str(i+1), fontsize=16) ax.set_ylim(-1, 101) ax.set_xlim(-1, len(qual) + 1) ax.legend(loc='best') fig.suptitle(title, fontsize=20) if savefig: from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder, \ get_quality_along_reads_filename fig_folder = get_figure_folder(data_folder, adaID) fig_filename = get_quality_along_reads_filename(data_folder, adaID, simple=True) mkdirs(fig_folder) fig.savefig(fig_filename) else: plt.tight_layout() plt.ion() plt.show()
'--repnumber', type=int, default=0, help='Index of the sequenced sample within that patient sample') args = parser.parse_args() pname = args.patient fragments = args.fragments VERBOSE = args.verbose repn = args.repnumber samplename = args.sample patient = load_patient(pname) patient.discard_nonsequenced_samples() mkdirs(get_initial_reference_foldername(pname)) if not fragments: fragments = ['F' + str(i) for i in xrange(1, 7)] if VERBOSE >= 3: print 'fragments', fragments if samplename is None: sample = SamplePat(patient.samples.iloc[samplen]) else: sample = load_sample_sequenced(samplename) for fragment in fragments: sample_seq = SampleSeq(sample.samples_seq.iloc[repn]) seq_run = sample_seq['seq run']
parser.add_argument('--sample', help='Use a specific sample (not the first time point) for the reference') parser.add_argument('--repnumber', type=int, default=0, help='Index of the sequenced sample within that patient sample') args = parser.parse_args() pname = args.patient fragments = args.fragments VERBOSE = args.verbose repn = args.repnumber samplename = args.sample patient = load_patient(pname) patient.discard_nonsequenced_samples() mkdirs(get_initial_reference_foldername(pname)) if not fragments: fragments = ['F'+str(i) for i in xrange(1, 7)] if VERBOSE >= 3: print 'fragments', fragments if samplename is None: sample = SamplePat(patient.samples.iloc[samplen]) else: sample = load_sample_sequenced(samplename) for fragment in fragments: sample_seq = SampleSeq(sample.samples_seq.iloc[repn]) seq_run = sample_seq['seq run']
# Script if __name__ == '__main__': parser = argparse.ArgumentParser(description="make figure") parser.add_argument('--redo', action='store_true', help='recalculate data') params = parser.parse_args() fragment = 'F1' VERBOSE = 2 username = os.path.split(os.getenv('HOME'))[-1] foldername = get_figure_folder(username, 'first') fn_data = foldername+'data/' mkdirs(fn_data) fn_data = fn_data + 'minor_alleles_example.pickle' if not os.path.isfile(fn_data) or params.redo: samplename = 'NL4-3' sample = lss(samplename) counts = sample.get_allele_counts(fragment, merge_read_types=True) data = compress_data(counts, samplename, fragment) samplename = '27134' sample = lssp(samplename) counts = sample.get_allele_counts(fragment, merge_read_types=True) data = compress_data(counts, samplename, fragment, data=data) store_data(data, fn_data)
import sys import os from hivwholeseq.utils.generic import mkdirs from hivwholeseq.patients.samples import itersample from hivwholeseq.sequencing.samples import load_samples_sequenced as lss from hivwholeseq.patients.samples import load_samples_sequenced as lssp from hivwholeseq.sequencing.filenames import get_sample_foldername # Script if __name__ == '__main__': samples_pat = lssp() samples_seq = lss() for samplename, sample in itersample(samples_pat): root_foldername = sample.get_foldername()+'samples_sequencing/' mkdirs(root_foldername) for samplenameseq, sampleseq in samples_seq.iterrows(): if sampleseq['patient sample'] == samplename: src_folder = get_sample_foldername(samplenameseq) dst_folder = root_foldername+samplenameseq if not os.path.islink(dst_folder): os.symlink(src_folder, dst_folder) print 'Symlink:', src_folder, dst_folder else: print 'Esists:', dst_folder
dist_hist = get_distance_histogram(data_folder, adaID, fragment, VERBOSE=VERBOSE) except IOError: continue dist_hists.append((samplename_seq, fragment, dist_hist)) dist_hists.sort(key=itemgetter(1)) fig, ax = plt.subplots() for i, (samplename_seq, fragment, h) in enumerate(dist_hists): plot_distance_histogram(h, ax=ax, color=cm.jet(1.0 * i / len(dist_hists)), label=', '.join([samplename_seq, fragment])) ax.set_title(samplename) ax.legend(loc=1, fontsize=10) if use_save: foldername = sample.get_foldername() + 'figures/' mkdirs(foldername) fn = foldername + 'distance_to_consensus_seqsamples.png' fig.savefig(fn) plt.close(fig) if not use_save: plt.ion() plt.show()
if not len(datum['ind']): win_start += gap continue datum['times'] = patient.times[datum['ind']] datum['pcode'] = patient.code datum['window'] = (win_start, win_end) data.append(datum) if use_save: if VERBOSE >= 2: print 'Save to file' rname = 'scan_' + str(win_start) + '-' + str(win_end) fn_out = patient.get_haplotype_count_trajectory_filename(rname) mkdirs(os.path.dirname(fn_out)) np.savez_compressed( fn_out, hct=datum['hct'], ind=datum['ind'], times=datum['times'], seqs=datum['seqs'], ali=datum['alim'], ) if VERBOSE >= 2: print 'Build tree' times = datum['times'] alim = datum['alim'] hct = datum['hct'] hft = 1.0 * hct / hct.sum(axis=0)
adaID = sample_seq.adapter for fragment in fragments: try: dist_hist = get_distance_histogram(data_folder, adaID, fragment, VERBOSE=VERBOSE) except IOError: continue dist_hists.append((samplename_seq, fragment, dist_hist)) dist_hists.sort(key=itemgetter(1)) fig, ax = plt.subplots() for i, (samplename_seq, fragment, h) in enumerate(dist_hists): plot_distance_histogram(h, ax=ax, color=cm.jet(1.0 * i / len(dist_hists)), label=', '.join([samplename_seq, fragment])) ax.set_title(samplename) ax.legend(loc=1, fontsize=10) if use_save: foldername = sample.get_foldername()+'figures/' mkdirs(foldername) fn = foldername+'distance_to_consensus_seqsamples.png' fig.savefig(fn) plt.close(fig) if not use_save: plt.ion() plt.show()
parser = argparse.ArgumentParser(description='Copy data folder', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('destination', help='Destination folder') parser.add_argument('--strip-PCR1', action='store_true', help='Strip the ../PCR1/ part of the file tree') args = parser.parse_args() dst_fn = args.destination.lstrip(os.sep)+os.sep stirp_PCR1 = args.strip_PCR1 patients_fn = dst_fn+'patients/' ref_fn = dst_fn+'reference/' print 'Make root folders' mkdirs(patients_fn) mkdirs(ref_fn) print 'Reference sequences' copy_reference(ref_fn) patients = load_patients() for pname, patient in patients.iterrows(): print pname patient = Patient(patient) print 'Make folder' pat_fn = patients_fn+pname+os.sep mkdirs(pat_fn)
if not len(datum['ind']): win_start += gap continue datum['times'] = patient.times[datum['ind']] datum['pcode'] = patient.code datum['window'] = (win_start, win_end) data.append(datum) if use_save: if VERBOSE >= 2: print 'Save to file' rname = 'scan_'+str(win_start)+'-'+str(win_end) fn_out = patient.get_haplotype_count_trajectory_filename(rname) mkdirs(os.path.dirname(fn_out)) np.savez_compressed(fn_out, hct=datum['hct'], ind=datum['ind'], times=datum['times'], seqs=datum['seqs'], ali=datum['alim'], ) if VERBOSE >= 2: print 'Build tree' times = datum['times'] alim = datum['alim'] hct = datum['hct'] hft = 1.0 * hct / hct.sum(axis=0) ali = expand_annotate_alignment(alim, hft, hct, times,
# Script if __name__ == '__main__': parser = argparse.ArgumentParser(description="make figure") parser.add_argument('--redo', action='store_true', help='recalculate data') params = parser.parse_args() fragment = 'F1' VERBOSE = 2 username = os.path.split(os.getenv('HOME'))[-1] foldername = get_figure_folder(username, 'first') fn_data = foldername + 'data/' mkdirs(fn_data) fn_data = fn_data + 'minor_alleles_example.pickle' if not os.path.isfile(fn_data) or params.redo: samplename = 'NL4-3' sample = lss(samplename) counts = sample.get_allele_counts(fragment, merge_read_types=True) data = compress_data(counts, samplename, fragment) samplename = '27134' sample = lssp(samplename) counts = sample.get_allele_counts(fragment, merge_read_types=True) data = compress_data(counts, samplename, fragment, data=data) store_data(data, fn_data) else: