def main(args): print "INFO\t" + now() + "\tstarting " + sys.argv[0] + " called with args: " + ' '.join(sys.argv) + "\n" bedfile = open(args.varFileName, 'r') reffile = pysam.Fastafile(args.refFasta) if not os.path.exists(args.bamFileName + '.bai'): sys.stderr.write("ERROR\t" + now() + "\tinput bam must be indexed, not .bai file found for " + args.bamFileName + " \n") sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, args.picardjar) # load readlist to avoid, if specified avoid = None if args.avoidreads is not None: avoid = dictlist(args.avoidreads) # make a temporary file to hold mutated reads outbam_mutsfile = "addindel." + str(uuid4()) + ".muts.bam" bamfile = pysam.Samfile(args.bamFileName, 'rb') outbam_muts = pysam.Samfile(outbam_mutsfile, 'wb', template=bamfile) outbam_muts.close() bamfile.close() tmpbams = [] if not os.path.exists(args.tmpdir): os.mkdir(args.tmpdir) print "INFO\t" + now() + "\tcreated tmp directory: " + args.tmpdir if not os.path.exists('addindel_logs_' + os.path.basename(args.outBamFile)): os.mkdir('addindel_logs_' + os.path.basename(args.outBamFile)) print "created directory: addindel_logs_" + os.path.basename(args.outBamFile) assert os.path.exists('addindel_logs_' + os.path.basename(args.outBamFile)), "could not create output directory!" assert os.path.exists(args.tmpdir), "could not create temporary directory!" pool = Pool(processes=int(args.procs)) results = [] ntried = 0 for bedline in bedfile: if ntried < int(args.numsnvs) or int(args.numsnvs) == 0: c = bedline.strip().split() chrom = c[0] start = int(c[1]) end = int(c[2]) vaf = float(c[3]) type = c[4] ins = None assert type in ('INS', 'DEL') if type == 'INS': ins = c[5] # make mutation (submit job to thread pool) result = pool.apply_async(makemut, [args, chrom, start, end, vaf, ins, avoid, alignopts]) results.append(result) ntried += 1 for result in results: try: tmpbamlist = result.get() if tmpbamlist is not None: for tmpbam in tmpbamlist: tmpbams.append(tmpbam) except AssertionError: print "****************************************************" print "* WARNING: assertion failed somewhere, check logs. *" print "****************************************************" tmpbams.sort() # merge tmp bams if len(tmpbams) == 1: os.rename(tmpbams[0],outbam_mutsfile) elif len(tmpbams) > 1: mergebams(tmpbams,outbam_mutsfile,maxopen=int(args.maxopen)) bedfile.close() # cleanup for bam in tmpbams: if os.path.exists(bam): os.remove(bam) if os.path.exists(bam + '.bai'): os.remove(bam + '.bai') if args.skipmerge: print "INFO\t" + now() + "\tskipping merge, plase merge reads from", outbam_mutsfile, "manually." else: if args.tagreads: from bs.markreads import markreads tmp_tag_bam = 'tag.%s.bam' % str(uuid4()) markreads(mergedtmp, tmp_tag_bam) move(tmp_tag_bam, mergedtmp) print "INFO\t" + now() + "\ttagged reads." print "INFO\t" + now() + "\tdone making mutations, merging mutations into", args.bamFileName, "-->", args.outBamFile replace(args.bamFileName, outbam_mutsfile, args.outBamFile, seed=args.seed) #cleanup os.remove(outbam_mutsfile)
def main(args): print "INFO\t" + now() + "\tstarting " + sys.argv[0] + " called with args: " + ' '.join(sys.argv) + "\n" bedfile = open(args.varFileName, 'r') reffile = pysam.Fastafile(args.refFasta) if not os.path.exists(args.bamFileName + '.bai'): sys.stderr.write("ERROR\t" + now() + "\tinput bam must be indexed, not .bai file found for " + args.bamFileName + " \n") sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(':') for o in args.alignopts.split(',')]) aligners.checkoptions(args.aligner, alignopts, args.picardjar) # load readlist to avoid, if specified avoid = None if args.avoidreads is not None: avoid = dictlist(args.avoidreads) # make a temporary file to hold mutated reads outbam_mutsfile = "addsnv." + str(uuid4()) + ".muts.bam" bamfile = pysam.Samfile(args.bamFileName, 'rb') outbam_muts = pysam.Samfile(outbam_mutsfile, 'wb', template=bamfile) outbam_muts.close() bamfile.close() tmpbams = [] if not os.path.exists(args.tmpdir): os.mkdir(args.tmpdir) print "INFO\t" + now() + "\tcreated tmp directory: " + args.tmpdir if not os.path.exists('addsnv_logs_' + os.path.basename(args.outBamFile)): os.mkdir('addsnv_logs_' + os.path.basename(args.outBamFile)) print "INFO\t" + now() + "\tcreated directory: addsnv_logs_" + os.path.basename(args.outBamFile) assert os.path.exists('addsnv_logs_' + os.path.basename(args.outBamFile)), "could not create output directory!" assert os.path.exists(args.tmpdir), "could not create temporary directory!" pool = Pool(processes=int(args.procs)) results = [] ntried = 0 targets = [] for bedline in bedfile: if ntried < int(args.numsnvs) or int(args.numsnvs) == 0: c = bedline.strip().split() target = { 'chrom' : c[0], 'start' : int(c[1]), 'end' : int(c[2]), 'vaf' : None, 'altbase' : None } # VAF is 4th column, if present if len(c) > 3: target['vaf'] = float(c[3]) # ALT is 5th column, if present if len(c) == 5: altbase = c[4].upper() assert altbase in ['A','T','C','G'], "ERROR:\t" + now() + "\tALT " + altbase + " not A, T, C, or G!\n" target['altbase'] = altbase targets.append(target) ntried += 1 targets = sorted(targets, key=itemgetter('chrom', 'start')) # sort list of dicts by chrom, start haploclusters = [] hc = [] lastchrom = None laststart = None hapsize = int(args.haplosize) for target in targets: if lastchrom is None: lastchrom = target['chrom'] laststart = target['start'] hc.append(target) elif target['chrom'] == lastchrom: if laststart is None: laststart = target['start'] hc.append(target) elif target['start'] - laststart < hapsize: hc.append(target) else: haploclusters.append(hc) hc = [] hc.append(target) elif target['chrom'] != lastchrom: haploclusters.append(hc) hc = [] laststart = None hc.append(target) haploclusters.append(hc) print "Debug, haploclusters:" + str(haploclusters) for hc in haploclusters: # make mutation (submit job to thread pool) result = pool.apply_async(makemut, [args, hc, avoid, alignopts]) results.append(result) for result in results: tmpbamlist = result.get() if tmpbamlist is not None: for tmpbam in tmpbamlist: tmpbams.append(tmpbam) # merge tmp bams if len(tmpbams) == 1: move(tmpbams[0],outbam_mutsfile) elif len(tmpbams) > 1: mergebams(tmpbams,outbam_mutsfile,maxopen=int(args.maxopen)) bedfile.close() # cleanup for bam in tmpbams: if os.path.exists(bam): os.remove(bam) if os.path.exists(bam + '.bai'): os.remove(bam + '.bai') if args.skipmerge: print "INFO\t" + now() + "\tskipping merge, plase merge reads from", outbam_mutsfile, "manually." else: if args.tagreads: from bs.markreads import markreads tmp_tag_bam = 'tag.%s.bam' % str(uuid4()) markreads(mergedtmp, tmp_tag_bam) move(tmp_tag_bam, mergedtmp) print "INFO\t" + now() + "\ttagged reads." print "INFO\t" + now() + "\tdone making mutations, merging mutations into", args.bamFileName, "-->", args.outBamFile replace(args.bamFileName, outbam_mutsfile, args.outBamFile, seed=args.seed) #cleanup os.remove(outbam_mutsfile)
for exclfn in exclfns: if os.path.isfile(exclfn): os.remove(exclfn) for tmpbam in tmpbams: if os.path.isfile(tmpbam): os.remove(tmpbam) if os.path.isfile(tmpbam + ".bai"): os.remove(tmpbam + ".bai") else: if args.tagreads: from bs.markreads import markreads tmp_tag_bam = "tag.%s.bam" % str(uuid4()) markreads(mergedtmp, tmp_tag_bam) move(tmp_tag_bam, mergedtmp) print "INFO\t" + now() + "\ttagged reads." print "INFO\t" + now() + "\tswapping reads into original and writing to ", args.outBamFile replace( args.bamFileName, mergedtmp, args.outBamFile, excl_merged, keepsecondary=args.keepsecondary, seed=args.seed ) if not args.debug: os.remove(excl_merged) os.remove(mergedtmp) for exclfn in exclfns: if os.path.isfile(exclfn): os.remove(exclfn)
if not args.debug: for exclfn in exclfns: if os.path.isfile(exclfn): os.remove(exclfn) for tmpbam in tmpbams: if os.path.isfile(tmpbam): os.remove(tmpbam) if os.path.isfile(tmpbam + '.bai'): os.remove(tmpbam + '.bai') else: if args.tagreads: from bs.markreads import markreads tmp_tag_bam = 'tag.%s.bam' % str(uuid4()) markreads(mergedtmp, tmp_tag_bam) move(tmp_tag_bam, mergedtmp) print "INFO\t" + now() + "\ttagged reads." print "INFO\t" + now( ) + "\tswapping reads into original and writing to ", args.outBamFile replace(args.bamFileName, mergedtmp, args.outBamFile, excl_merged, keepsecondary=args.keepsecondary, seed=args.seed) if not args.debug: os.remove(excl_merged) os.remove(mergedtmp)
def main(args): print "INFO\t" + now() + "\tstarting " + sys.argv[0] + " called with args: " + " ".join(sys.argv) + "\n" bedfile = open(args.varFileName, "r") reffile = pysam.Fastafile(args.refFasta) if not os.path.exists(args.bamFileName + ".bai"): sys.stderr.write( "ERROR\t" + now() + "\tinput bam must be indexed, not .bai file found for " + args.bamFileName + " \n" ) sys.exit(1) alignopts = {} if args.alignopts is not None: alignopts = dict([o.split(":") for o in args.alignopts.split(",")]) aligners.checkoptions(args.aligner, alignopts, args.picardjar) # load readlist to avoid, if specified avoid = None if args.avoidreads is not None: avoid = dictlist(args.avoidreads) # make a temporary file to hold mutated reads outbam_mutsfile = "addsnv." + str(uuid4()) + ".muts.bam" bamfile = pysam.Samfile(args.bamFileName, "rb") outbam_muts = pysam.Samfile(outbam_mutsfile, "wb", template=bamfile) outbam_muts.close() bamfile.close() tmpbams = [] if not os.path.exists(args.tmpdir): os.mkdir(args.tmpdir) print "INFO\t" + now() + "\tcreated tmp directory: " + args.tmpdir if not os.path.exists("addsnv_logs_" + os.path.basename(args.outBamFile)): os.mkdir("addsnv_logs_" + os.path.basename(args.outBamFile)) print "INFO\t" + now() + "\tcreated directory: addsnv_logs_" + os.path.basename(args.outBamFile) assert os.path.exists("addsnv_logs_" + os.path.basename(args.outBamFile)), "could not create output directory!" assert os.path.exists(args.tmpdir), "could not create temporary directory!" pool = Pool(processes=int(args.procs)) results = [] ntried = 0 targets = [] for bedline in bedfile: if ntried < int(args.numsnvs) or int(args.numsnvs) == 0: c = bedline.strip().split() target = {"chrom": c[0], "start": int(c[1]), "end": int(c[2]), "vaf": None, "altbase": None} # VAF is 4th column, if present if len(c) > 3: target["vaf"] = float(c[3]) # ALT is 5th column, if present if len(c) == 5: altbase = c[4].upper() assert altbase in ["A", "T", "C", "G"], ( "ERROR:\t" + now() + "\tALT " + altbase + " not A, T, C, or G!\n" ) target["altbase"] = altbase targets.append(target) ntried += 1 targets = sorted(targets, key=itemgetter("chrom", "start")) # sort list of dicts by chrom, start haploclusters = [] hc = [] lastchrom = None laststart = None hapsize = int(args.haplosize) for target in targets: if lastchrom is None: lastchrom = target["chrom"] laststart = target["start"] hc.append(target) elif target["chrom"] == lastchrom: if laststart is None: laststart = target["start"] hc.append(target) elif target["start"] - laststart < hapsize: hc.append(target) else: haploclusters.append(hc) hc = [] hc.append(target) elif target["chrom"] != lastchrom: haploclusters.append(hc) hc = [] laststart = None hc.append(target) haploclusters.append(hc) print "Debug, haploclusters:" + str(haploclusters) for hc in haploclusters: # make mutation (submit job to thread pool) result = pool.apply_async(makemut, [args, hc, avoid, alignopts]) results.append(result) for result in results: tmpbamlist = result.get() if tmpbamlist is not None: for tmpbam in tmpbamlist: tmpbams.append(tmpbam) # merge tmp bams if len(tmpbams) == 1: move(tmpbams[0], outbam_mutsfile) elif len(tmpbams) > 1: mergebams(tmpbams, outbam_mutsfile, maxopen=int(args.maxopen)) bedfile.close() # cleanup for bam in tmpbams: if os.path.exists(bam): os.remove(bam) if os.path.exists(bam + ".bai"): os.remove(bam + ".bai") if args.skipmerge: print "INFO\t" + now() + "\tskipping merge, plase merge reads from", outbam_mutsfile, "manually." else: if args.tagreads: from bs.markreads import markreads tmp_tag_bam = "tag.%s.bam" % str(uuid4()) markreads(mergedtmp, tmp_tag_bam) move(tmp_tag_bam, mergedtmp) print "INFO\t" + now() + "\ttagged reads." print "INFO\t" + now() + "\tdone making mutations, merging mutations into", args.bamFileName, "-->", args.outBamFile replace(args.bamFileName, outbam_mutsfile, args.outBamFile, seed=args.seed) # cleanup os.remove(outbam_mutsfile)