def implement_cnv(chromosome_event): chr, event = chromosome_event.split("_") logger.debug("___ Bamgineer main engine started ___") success = True try: if not terminating.is_set(): bamfn, sortbyname, sortbyCoord, bedfn = init_file_names( chr, tmpbams_path, haplotype_path, event) bamsortfn = sub('.bam$', '.sorted.bam', bamfn) if (os.path.isfile(bedfn)): fn = list(csv.reader(open(bedfn, 'rb'), delimiter='\t')) copy_number = int(fn[0][6]) if (not params.GetXY() or (chr != 'chrX' and chr != 'chrY')): if (copy_number == 2): event = 'loh' elif (copy_number == 3): event = 'gain' elif (copy_number > 3): event = 'amp' else: logger.debug("*** handling single sex chromosome for: " + ntpath.basename(bamsortfn)) if (copy_number == 1): event = 'loh' elif (copy_number == 2): event = 'gain' elif (copy_number > 2): event = 'amp' if (event.startswith('amp') or event.startswith('gain')): bamrepairedsortfn = sub('.sorted.bam$', ".re_paired.sorted.bam", bamsortfn) mergedsortfn = sub('.sorted.bam$', ".mutated_merged.sorted.bam", bamrepairedsortfn) GAIN_FINAL = "/".join( [finalbams_path, str(chr).upper() + '_GAIN.bam']) if os.path.isfile(bamsortfn): re_pair_reads(bamsortfn, copy_number) mutate_reads(bamrepairedsortfn, chr, event) coverageratio = float( countReads(mergedsortfn)) / float( countReads(bamsortfn)) logger.debug("+++ coverage ratio for: " + ntpath.basename(bamsortfn) + ": " + str(coverageratio)) if coverageratio < copy_number - 2: logger.error('not enough reads for ' + ntpath.basename(bamsortfn)) return else: samplerate = float(copy_number - 2) / coverageratio subsample(mergedsortfn, GAIN_FINAL, str(samplerate)) elif event == 'loss': inbam_deletion = "/".join( [finalbams_path, str(chr).upper() + '_LOSS.bam']) if os.path.isfile(bamsortfn): mutate_reads(bamsortfn, chr, 'loss') mergedsortfn = sub('.sorted.bam$', ".mutated_merged.sorted.bam", bamsortfn) mergedsortsampledfn = sub( '.sorted.bam$', ".mutated_merged.sampled.sorted.bam", bamsortfn) ratio_kept = float(countReads(bamsortfn)) / float( countReads(bamfn)) samplerate = round(0.5 / ratio_kept, 2) LOSS_FINAL = "/".join( [finalbams_path, str(chr).upper() + '_LOSS.bam']) logger.debug("ratios kept for:" + ntpath.basename(bamsortfn) + ": " + str(ratio_kept)) subsample(mergedsortfn, mergedsortsampledfn, str(samplerate)) bamDiff(sortbyCoord, mergedsortsampledfn, tmpbams_path) os.rename( "/".join( [tmpbams_path, 'diff_only1_' + chr + '.bam']), LOSS_FINAL) elif (not os.path.isfile(inbam_deletion) and os.path.isfile(sortbyCoord) ): # if it exists from previous runs os.symlink(sortbyCoord, inbam_deletion) else: logger.debug(bedfn + ' does not exist!') success = False except (KeyboardInterrupt): logger.error( 'Exception Crtl+C pressed in the child process in find_roi_bam for chr ' + chr + event) terminating.set() success = False return except Exception as e: logger.exception("Exception in find_roi_bam %s", e) terminating.set() success = False return if (success): logger.debug("implement_cnv complete successfully for " + chr + event) return
def mutate_reads(bamsortfn, chr, event=''): fn, sortbyname, sortbyCoord, bedfn = init_file_names( chr, tmpbams_path, haplotype_path, event) cmd = " ".join(["sort -u", bedfn, "-o", bedfn]) runCommand(cmd) hetbamfn = sub('.sorted.bam$', ".mutated_het.bam", bamsortfn) hetbamfnsorted = sub('.sorted.bam$', ".mutated_het.sorted", bamsortfn) allreadsfn = sub('.sorted.bam$', ".all.reads.bam", bamsortfn) allreadssortfn = sub('.sorted.bam$', ".all.reads.sorted", bamsortfn) mergedsortfn = sub('.sorted.bam$', ".mutated_merged.sorted.bam", bamsortfn) try: if not terminating.is_set(): if (os.path.isfile(bamsortfn) and os.path.isfile(bedfn)): samfile = pysam.Samfile(bamsortfn, "rb") alignmentfile = pysam.AlignmentFile(bamsortfn, "rb") outbam = pysam.Samfile(hetbamfn, 'wb', template=samfile) allreads = pysam.Samfile(allreadsfn, 'wb', template=samfile) bedfile = open(bedfn, 'r') covpath = "/".join( [haplotype_path, "written_coverage_het.txt"]) covfile = open(covpath, 'w') snpratiopath = "/".join([haplotype_path, "het_snp_ratio.txt"]) snpaltratiofile = open(snpratiopath, 'w') writtenreads = [] num_reads_written = 0 num_total_reads = 0 for bedline in bedfile: c = bedline.strip().split() if (len(c) == 7): chr2 = c[0] chr = c[0].strip("chr") start = int(c[1]) end = int(c[2]) refbase = str(c[3]) altbase = str(c[4]) haplotype = str(c[5]) copy_number = int(c[6]) else: continue readmappings = alignmentfile.fetch(chr2, start, end) # sex chromosome if (params.GetXY() and (chr == 'chrX' or chr == 'chrY')): haplotype = 'hap1' print('sex chromosome ' + str(chr)) for shortread in readmappings: allreads.write(shortread) num_total_reads += 1 problem_with_read = False try: index = shortread.get_reference_positions( full_length=True).index(start) tmpread = shortread.query_sequence qual = shortread.query_qualities mutated_hap1 = tmpread[:index] + altbase + tmpread[ index + 1:] mutated_hap2 = tmpread[:index] + refbase + tmpread[ index + 1:] if (haplotype == "hap1"): shortread.query_sequence = mutated_hap1 elif (haplotype == "hap2"): shortread.query_sequence = mutated_hap2 shortread.query_qualities = qual except Exception as e: print('Exception! ') problem_with_read = True pass if (not problem_with_read): outbam.write(shortread) num_reads_written += 1 outbam.close() allreads.close() sortBam(hetbamfn, hetbamfnsorted + '.bam', tmpbams_path) sortBam(allreadsfn, allreadssortfn + '.bam', tmpbams_path) os.remove(hetbamfn) os.remove(allreadsfn) # ratio of het reads to nonhet reads, we need to adjust the coverage ratio = float(num_reads_written) / float(num_total_reads) bamsortfnsampled = sub('.sorted.bam$', ".sampled.nh.bam", bamsortfn) subsample(bamsortfn, bamsortfnsampled, str(ratio)) bamDiff(bamsortfnsampled, allreadssortfn + '.bam', tmpbams_path) if ("/".join([ tmpbams_path, 'diff_only1_' + os.path.basename(bamsortfnsampled) ])): merge_bams( "/".join([ tmpbams_path, 'diff_only1_' + os.path.basename(bamsortfnsampled) ]), hetbamfnsorted + '.bam', mergedsortfn) os.remove("/".join([ tmpbams_path, 'diff_only1_' + os.path.basename(bamsortfnsampled) ])) os.remove(bamsortfnsampled) os.remove(allreadssortfn + '.bam') os.remove(allreadssortfn + '.bam.bai') os.remove(hetbamfnsorted + '.bam') os.remove(hetbamfnsorted + '.bam.bai') except (KeyboardInterrupt): logger.error( 'Exception Crtl+C pressed in the child process in mutaute_reads') terminating.set() return except Exception as e: logger.exception("Exception in mutate_reads %s", e) terminating.set() return return