def check_already_decontaminated(sample, fragment, PCR): """Check from the summary and output file whether decontamination has been done""" import os from hivwholeseq.utils.mapping import get_number_reads if (fragment == "F4") and (sample.name in maj_contnames): return True fn_sum = get_decontaminate_summary_filename(sample.patient, sample.name, fragment, PCR=PCR) fn_in = sample.get_mapped_filtered_filename(fragment, PCR=PCR, decontaminated=False) fn_out = sample.get_mapped_filtered_filename(fragment, PCR=PCR, decontaminated=True) if not all(map(os.path.isfile, [fn_sum, fn_in, fn_out])): print sample.patient, sample.name, fragment, PCR return False n_reads_in = get_number_reads(fn_in) n_reads_out = get_number_reads(fn_out) (n_reads_good, n_reads_cont) = get_number_reads_summary(fn_sum) if n_reads_out != n_reads_good: print sample.patient, sample.name, fragment, PCR, n_reads_in, n_reads_out, n_reads_good, n_reads_cont return False if n_reads_good < 0.5 * n_reads_in: print sample.patient, sample.name, fragment, PCR, n_reads_in, n_reads_out, n_reads_good, n_reads_cont return False return True
def check_already_decontaminated(sample, fragment, PCR): '''Check from the summary and output file whether decontamination has been done''' import os from hivwholeseq.utils.mapping import get_number_reads if (fragment == 'F4') and (sample.name in maj_contnames): return True fn_sum = get_decontaminate_summary_filename(sample.patient, sample.name, fragment, PCR=PCR) fn_in = sample.get_mapped_filtered_filename(fragment, PCR=PCR, decontaminated=False) fn_out = sample.get_mapped_filtered_filename(fragment, PCR=PCR, decontaminated=True) if not all(map(os.path.isfile, [fn_sum, fn_in, fn_out])): print sample.patient, sample.name, fragment, PCR return False n_reads_in = get_number_reads(fn_in) n_reads_out = get_number_reads(fn_out) (n_reads_good, n_reads_cont) = get_number_reads_summary(fn_sum) if n_reads_out != n_reads_good: print sample.patient, sample.name, fragment, PCR, n_reads_in, n_reads_out, n_reads_good, n_reads_cont return False if n_reads_good < 0.5 * n_reads_in: print sample.patient, sample.name, fragment, PCR, n_reads_in, n_reads_out, n_reads_good, n_reads_cont return False return True
if samples.loc[contname].patient == pname: del consensi_sample[contname] print samplename, if VERBOSE >= 2: print "" (n_good, n_cont) = filter_contamination( bamfilename, bamfilename_out, consensi_sample, samplename, VERBOSE=VERBOSE, maxreads=maxreads ) if VERBOSE: print "good:", n_good, "contaminated:", n_cont if summary: sfn = get_decontaminate_summary_filename(pname, samplename, fragment, PCR=PCR_sample) with open(sfn, "w") as f: f.write( "Call: python decontaminate_reads.py" + " --samples " + samplename + " --fragments " + fragment + " --verbose " + str(VERBOSE) ) if maxreads != -1: f.write(" --maxreads " + str(maxreads)) f.write("\n") f.write("Good: " + str(n_good) + "\n") f.write("Contaminated: " + str(sum(n_cont.itervalues())) + "\n")
print samplename, if VERBOSE >= 2: print '' (n_good, n_cont) = filter_contamination(bamfilename, bamfilename_out, consensi_sample, samplename, VERBOSE=VERBOSE, maxreads=maxreads) if VERBOSE: print 'good:', n_good, 'contaminated:', n_cont if summary: sfn = get_decontaminate_summary_filename(pname, samplename, fragment, PCR=PCR_sample) with open(sfn, 'w') as f: f.write('Call: python decontaminate_reads.py'+\ ' --samples '+samplename+\ ' --fragments '+fragment+\ ' --verbose '+str(VERBOSE)) if maxreads != -1: f.write(' --maxreads ' + str(maxreads)) f.write('\n') f.write('Good: ' + str(n_good) + '\n') f.write('Contaminated: ' + str(sum(n_cont.itervalues())) + '\n') f.write('Contamination sources:\n') for contname, n_conti in n_cont.iteritems(): f.write('{:<20s}'.format(contname) + ' ' +
samples = load_samples_sequenced() samplenames = refnames + samples.index.tolist() for fragment in fragments: if VERBOSE >= 1: print fragment mat = np.ma.masked_all((len(samplenames), len(samplenames))) for (samplename, sample) in samples.iterrows(): if VERBOSE >= 2: print samplename, isp = samplenames.index(samplename) sample = SamplePat(sample) fn = get_decontaminate_summary_filename(sample.patient, samplename, fragment, PCR=1) #FIXME: still finishing this one if samplename == '29184': continue if not os.path.isfile(fn): if VERBOSE >= 2: print 'summary file not found' continue n_good, n_cont, n_cont_dict = get_number_reads_summary(fn, details=True) if VERBOSE >= 2: print n_good, n_cont