def blasr(query, target, format, nproc = 1, outname = "out.m5", consensus=True): """ Simple mapper """ cmd = ("blasr %s %s %s -nproc %d -bestn 1 -out %s ") \ % (query, target, format, nproc, outname) #need to figure out how to m5-pie it...maybe if consensus: r, o, e = exe(cmd + BLASRPARAMS) else: r, o, e = exe(cmd + EEBLASRPARAMS) logging.debug("blasr - %d - %s - %s" % (r, o, e))
def blasr(query, target, format, nproc=1, outname="out.m5", consensus=True): """ Simple mapper """ cmd = ("blasr %s %s %s --nproc %d --bestn 1 --out %s ") \ % (query, target, format, nproc, outname) #need to figure out how to m5-pie it...maybe if consensus: r, o, e = exe(cmd + BLASRPARAMS) else: r, o, e = exe(cmd + EEBLASRPARAMS) logging.debug("blasr - %d - %s - %s" % (r, o, e))
def samToFastq( inSam, outFq ): """ Creates input.fastq from SAM file """ return exe(('grep -v "^@" %s | ' 'awk \'{print "@" $1 "\\n" $10 "\\n+\\n" $11}\' ' '> %s') % (inSam, outFq))
def parseArgs(self): """ Uses OptionParser to parse out input Jelly.py <stage> <protocol> """ parser = OptionParser(USAGE) parser.remove_option("-h") parser.add_option("-h", "--help", action="store_true", default=False) parser.add_option("--debug", action="store_true", default=False) parser.add_option("-x", dest="extras", type="string", default="", \ help="-x \"<options>\" are options to pass into the stage you're running") self.options, args = parser.parse_args() if self.options.help == True: if len(args) == 1: if args[0] in STAGES: print(exe(Stages.PRINT_HELPS[args[0]])[1]) sys.exit(0) #Else, this will drop down to the next parser.error else: print(parser.format_help()) sys.exit(0) if len(args) != 2 or args[0] not in STAGES: parser.error("Invalid Arguments. Expected one of\n'%s'" % "', '".join(STAGES)) sys.exit(1) self.executeStage = args[0] self.protocolName = os.path.abspath(args[1])
def blasr(query, target, nproc=1, outname="out.m5"): """ Simple overlapper """ r, o, e = exe(("blasr %s %s -m 5 --bestn 200 --nCandidates 200 --minMatch 12 " "--affineExtend 3 --nproc %d --noSplitSubreads --out %s --maxScore -1000") % \ (query, target, nproc, outname))
def remapReads(reads, outName): """ remaps reads to the provided reference (only setup for hg19 -- see global variable reference) """ return exe("blasr {0} {1} -sa {1}.sa -nproc 4 -out {2} -sam -bestn 1"\ .format(reads, reference, outName))
def callBlasr(inFile, refFile, params, nproc=1, outFile="map.sam"): """ fq = input file automatically search for .sa """ if os.path.exists(refFile+".sa"): sa = "-sa " + refFile + ".sa" else: sa = "" logging.info("Running Blasr") cmd = ("blasr %s %s %s -nproc %d -bestn 1 " "-sam -clipping subread -out %s ") \ % (inFile, refFile, sa, nproc, outFile) r, o, e = exe(cmd + params) #r,o,e = exe(("blasr %s %s %s -nproc %d -sam -bestn 1 -nCandidates 20 " #"-out %s -clipping soft -minPctIdentity 75 " #" -noSplitSubreads") % (fq, ref, sa, nproc, out)) if r != 0: logging.error("blasr mapping failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info(str([r, o, e]))
def remapReads( reads, outName): """ remaps reads to the provided reference (only setup for hg19 -- see global variable reference) """ return exe("blasr {0} {1} -sa {1}.sa -nproc 4 -out {2} -sam -bestn 1"\ .format(reads, reference, outName))
def samToFastq(inSam, outFq): """ Creates input.fastq from SAM file """ return exe(('grep -v "^@" %s | ' 'awk \'{print "@" $1 "\\n" $10 "\\n+\\n" $11}\' ' '> %s') % (inSam, outFq))
def sam2bam( fn ): """ Creates BAM from SAM (only setup for hg19 -- see global variable reference) """ name = fn[:-4] return exe(("samtools view -bt {0} {1} | samtools sort - {2}.sort && " "mv {2}.sort.bam {2}.bam && " "samtools index {2}.bam").format(reference, fn, name))
def blasr(query, target, format, nproc=1, outname="out.m5", consensus=True): """ Simple mapper """ cmd = ("blasr %s %s %s -nproc %d -bestn 1 -out %s ") \ % (query, target, format, nproc, outname) #need to figure out how to m5-pie it...maybe if consensus: r, o, e = exe(cmd + " -noSplitSubreads -minMatch 5 " + \ "-nCandidates 20 -sdpTupleSize 6 -insertion 1 -deletion 1 -bestn 1") else: r, o, e = exe(cmd + " -maxAnchorsPerPosition 100 " "-affineAlign -affineOpen 100 -affineExtend 0 " "-insertion 10 -deletion 10 " "-noSplitSubreads -nCandidates 20 ") logging.debug("blasr - %d - %s - %s" % (r, o, e))
def sam2bam(fn): """ Creates BAM from SAM (only setup for hg19 -- see global variable reference) """ name = fn[:-4] return exe(("samtools view -bt {0} {1} | samtools sort - {2}.sort && " "mv {2}.sort.bam {2}.bam && " "samtools index {2}.bam").format(reference, fn, name))
def blasr(query, target, format, nproc = 1, outname = "out.m5", consensus=True): """ Simple mapper """ cmd = ("blasr %s %s %s -nproc %d -bestn 1 -out %s ") \ % (query, target, format, nproc, outname) #need to figure out how to m5-pie it...maybe if consensus: r, o, e = exe(cmd + " -noSplitSubreads -minMatch 5 " + \ "-nCandidates 20 -sdpTupleSize 6 -insertion 1 -deletion 1 -bestn 1") else: r, o, e = exe(cmd + " -maxAnchorsPerPosition 100 " "-affineAlign -affineOpen 100 -affineExtend 0 " "-insertion 10 -deletion 10 " "-noSplitSubreads -nCandidates 20 ") logging.debug("blasr - %d - %s - %s" % (r, o, e))
def mapTails(fq, ref, nproc=1, out="tailmap.sam", useSa=True): """ automatically search for .sa """ if os.path.exists(ref + ".sa") and useSa: sa = "--sa " + ref + ".sa" else: sa = "" cmd = ("blasr %s %s %s --nproc %d -m 4 --bestn 1 --nCandidates 20 --out %s" " --minPctIdentity 75 --sdpTupleSize 6 --noSplitSubreads") \ % (fq, ref, sa, nproc, out) logging.debug(cmd) r, o, e = exe(cmd) if r != 0: logging.error("blasr mapping failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info(str([r, o, e]))
def mapTails(fq, ref, nproc=1, out="tailmap.sam", useSa=True): """ automatically search for .sa """ if os.path.exists(ref+".sa") and useSa: sa = "--sa " + ref + ".sa" else: sa = "" cmd = ("blasr %s %s %s --nproc %d -m 4 --bestn 1 --nCandidates 20 --out %s" " --minPctIdentity 75 --sdpTupleSize 6 --noSplitSubreads") \ % (fq, ref, sa, nproc, out) logging.debug(cmd) r,o,e = exe(cmd) if r != 0: logging.error("blasr mapping failed!") logging.error("RETCODE %d" % (r)) logging.error("STDOUT %s" % (str(o))) logging.error("STDERR %s" % (str(e))) logging.error("Exiting") exit(r) logging.info(str([r, o, e]))
def consensusCalling(self, spot, bam, reference, args): """ Make a consensus of all the reads in the region and identify all of the SVs in the region """ # MAXNUMREADS = 100 #I don't think we'll need more than this many reads MAXATTEMPTS = MAXNUMREADS/2 #I don't feel like trying 100 times SPANBUFFER = 100 #number of bases I want a read to span chrom, start, end = spot.chrom, spot.start, spot.end buffer = args.buffer supportReads = [] spanReads = [] #Fetch reads and trim totCnt = 0 for read in bam.fetch(chrom, max(0, start-buffer-SPANBUFFER), end+buffer+SPANBUFFER): if read.qname not in spot.varReads: continue seq, qual = self.readTrim(read, start-buffer, end+buffer) if read.pos < start-SPANBUFFER and read.aend > end+SPANBUFFER: spanReads.append((len(seq), seq, qual)) else: supportReads.append((seq, qual)) totCnt += 1 if len(spanReads) == 0: logging.debug("noone spans - consensus aborted. %s" % (str(spot))) spot.tags["noSpan"] = True return [spot] spanReads.sort(reverse=True) if len(spanReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads[:MAXNUMREADS]] elif len(spanReads) + len(supportReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads] + supportReads[:MAXNUMREADS-len(spanReads)] else: origSupportReads = [(x[1], x[2]) for x in spanReads] + supportReads logging.debug("Alt reads: %d total, %d extra support" % (totCnt, len(origSupportReads))) mySpots = [] refReadId = 0 haveVar = False #Attempt each spanRead until we get one that passes #while refReadId < len(spanReads) and not haveVar and refReadId < MAXATTEMPTS: #refread = spanReads[refReadId] #supportReads = origSupportReads[:refReadId] + origSupportReads[refReadId+1:] refReadId += 1 #read that spans most of the region goes first #use the rest for cleaning #building consensus sequence foutreads = NamedTemporaryFile(suffix=".fasta") qoutreads = open(foutreads.name + '.qual', 'w') for id, i in enumerate(origSupportReads): foutreads.write(">%d\n%s\n" % (id, i[0])) qoutreads.write(">%d\n%s\n" % (id, " ".join(str(ord(j)-33) for j in i[1]))) foutreads.flush() qoutreads.flush() #foutref = NamedTemporaryFile(suffix=".fasta") #foutref.write(">%s:%d-%d\n%s" % (spot.chrom, start, end, refread[1])) #foutref.flush() logging.debug("Making the contig....") #run it through phrap #make out.fasta and out.fasta.qual #run phrap #if asm -- consensus only r, o, e = exe("phrap %s -minmatch 6 -minscore 20" % (foutreads.name), timeout=3) if r != 0:#failed logging.warning('phrap failed ' + self.name) logging.warning(o) logging.warning(e) return [] #here is where I'd like to add just the no-consensus spot results = mergeFastaQual(foutreads.name + ".contigs", foutreads.name + ".contigs.qual") if len(results) == 0: logging.warning('no asm made ' + self.name) return [] #here is where I'd like to add just the no-consensus spot logging.info('%d contigs made %s' % (len(results), self.name)) #then run it through consensus logging.debug("Polishing contigs") alignOut = NamedTemporaryFile(suffix=".m5") blasr(foutreads.name, foutreads.name + ".contigs", format="-m 5", nproc=1, outname=alignOut.name) # elif no asm and consensus only (faster) if args.polish == "pbbanana": aligns = M5File(alignOut.name) con = ">con\n%s\n" % consensus(aligns).sequence conName = "pbbanana" elif args.polish == "pbdagcon": logging.debug("pbdagcon is running") #using minerrreads - 1 because one f them is already being used as seed! r, con, e = exe("pbdagcon -c %d -t 0 %s" % (max(0, args.minErrReads - 1), alignOut.name), timeout=1) #r, con, e = exe("pbdagcon %s" % (alignOut.name), timeout=2) logging.debug("back from pbdagcon") logging.debug((r,e)) #raw_input("press ent") if con is not None: con = con[con.index("\n")+1:] else: con = "" conName = "pbdagcon" alignOut.close() #foutref.close() foutreads.close() #we don't have a consensus - retry if len(con) == 0: logging.debug("Trying another seed read for consensus") con = results.values()[0].seq logging.debug("%s %d bp seq" % (conName, len(con.split('\n')[1]))) #try improving consensus conOut = NamedTemporaryFile(suffix=".fasta") conOut.write(con) #conOut.close() conOut.flush() refOut = NamedTemporaryFile(suffix=".fasta") #j = reference.fetch(chrom, max(0, start-buffer), end+buffer) #fout = open("f****e.ref.fasta",'w') #fout.write(j) #fout.close() refOut.write(">%s:%d-%d\n%s\n" % (chrom, start, end, \ reference.fetch(chrom, max(0, start-buffer), end+buffer))) refOut.flush() #map consensus to refregion varSam = NamedTemporaryFile(suffix=".sam") blasr(conOut.name, refOut.name, format="-sam", outname=varSam.name) #consensus=False) -- would this help? #or what if I fed it through leftalign? sam = pysam.Samfile(varSam.name) matches = 0.0 bases = 0.0 nReads = 0 mySpots = [] for read in sam: nReads += 1 spot.tags["consensusCreated"] = True for svstart, svsize, svtype, altseq in expandCigar(read, args.minIndelSize, CONFIRMCOLLAPSE, True): newspot = copy.deepcopy(spot) if spot.svtype == svtype and svtype == "INS": haveVar = True newspot.start = svstart + start - buffer newspot.end = svstart + start - buffer newspot.tags["seq"] = altseq newspot.size = svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq mySpots.append(newspot) elif spot.svtype == svtype and svtype == "DEL": haveVar = True newspot.start = svstart + start - buffer newspot.end = svstart + svsize + start - buffer newspot.size = -svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq newspot.tags["seq"] = reference.fetch(chrom, newspot.start, newspot.end) mySpots.append(newspot) #identity = matches/bases #If no var, nothing is returned. #for newspot in mySpots: #newspot.tags["alnIdentityEstimate"] = identity #Keep reporting the actual contigs out until we #find a reason to need it (and also we can get quals...) #vbam.reset() #for id, read in enumerate(vbam): #newspot.tags["contigSeq%d" % (id)] = read.seq #newspot.tags["contigQual%d" % (id)] = read.qual #vbam.close() #varBam.close() refOut.close() logging.debug("%d consensus reads created %d spots" % (nReads, len(mySpots))) return mySpots
def consensusCalling(self, spot, bam, reference, args): """ Make a consensus of all the reads in the region and identify all of the SVs in the region """ # MAXNUMREADS = 100 #I don't think we'll need more than this many reads MAXATTEMPTS = 5 #MAXNUMREADS/2 #I don't feel like trying 100 times SPANBUFFER = 100 #number of bases I want a read to span chrom, start, end = spot.chrom, spot.start, spot.end buffer = args.buffer supportReads = [] spanReads = [] #Fetch reads and trim totCnt = 0 for read in bam.fetch(chrom, max(0, start - buffer - SPANBUFFER), end + buffer + SPANBUFFER): if read.qname not in spot.varReads: continue seq, qual = self.readTrim(read, start - buffer, end + buffer) if read.pos < start - SPANBUFFER and read.aend > end + SPANBUFFER: sz = spot.varReadsSize[spot.varReads.index(read.qname)] spanReads.append((abs(sz - spot.tags["szMedian"]), seq, qual)) else: supportReads.append((seq, qual)) totCnt += 1 if len(spanReads) == 0: logging.debug("noone spans - consensus aborted. %s" % (str(spot))) spot.tags["noSpan"] = True return [spot] #spanReads.sort(reverse=True) spanReads.sort() if len(spanReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads[:MAXNUMREADS]] elif len(spanReads) + len(supportReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads ] + supportReads[:MAXNUMREADS - len(spanReads)] else: origSupportReads = [(x[1], x[2]) for x in spanReads] + supportReads mySpots = [] refReadId = 0 haveVar = False #Attempt each spanRead until we get one that passes while refReadId < len( spanReads) and not haveVar and refReadId < MAXATTEMPTS: refread = spanReads[refReadId] supportReads = origSupportReads[:refReadId] + origSupportReads[ refReadId + 1:] refReadId += 1 #read that spans most of the region goes first #use the rest for cleaning #building consensus sequence foutreads = NamedTemporaryFile(suffix=".fastq") for id, i in enumerate(supportReads): foutreads.write("@%d\n%s\n+\n%s\n" % (id, i[0], i[1])) foutreads.flush() foutref = NamedTemporaryFile(suffix=".fasta") foutref.write(">%s:%d-%d\n%s" % (spot.chrom, start, end, refread[1])) foutref.flush() alignOut = NamedTemporaryFile(suffix=".m5") logging.debug("making the contig....") #run it through phrap #then run it through consensus blasr(foutreads.name, foutref.name, format="-m 5", nproc=1, outname=alignOut.name) if args.consensus == "pbbanana": aligns = M5File(alignOut.name) con = ">con\n%s\n" % consensus(aligns).sequence conName = "pbbanana" elif args.consensus == "pbdagcon": logging.debug("pbdagcon is running") #using minerreads - 1 because one f them is already being used as seed! #I want to be sure I get something out... so just require somebody on there #r, con, e = exe("pbdagcon -c %d -t 0 %s" % (1, alignOut.name), timeout=1) #r, con, e = exe("pbdagcon -m 100 -c %d -t 0 %s" % (max(args.minErrReads - 1, 0), alignOut.name), timeout=1) r, con, e = exe("pbdagcon -m 100 -c %d -t 0 %s" % (3, alignOut.name), timeout=1) logging.debug("back from pbdagcon") logging.debug((r, e)) #raw_input("press ent") if con is not None: con = con[con.index("\n") + 1:] else: con = "" conName = "pbdagcon" alignOut.close() foutref.close() foutreads.close() #we don't have a consensus - retry if len(con) == 0: logging.debug("Trying another seed read for consensus") continue logging.debug("%s %d bp seq" % (conName, len(con.split('\n')[1]))) #try improving consensus conOut = NamedTemporaryFile(suffix=".fasta") conOut.write(con) #conOut.close() conOut.flush() refOut = NamedTemporaryFile(suffix=".fasta") #j = reference.fetch(chrom, max(0, start-buffer), end+buffer) #fout = open("f****e.ref.fasta",'w') #fout.write(j) #fout.close() refOut.write(">%s:%d-%d\n%s\n" % (chrom, start, end, \ reference.fetch(chrom, max(0, start-(buffer*2)), end+(buffer*2)))) refOut.flush() #map consensus to refregion varSam = NamedTemporaryFile(suffix=".sam") blasr(conOut.name, refOut.name, format="-sam", outname=varSam.name,\ consensus=False) #-- would this help? #or what if I fed it through leftalign? #os.system("cp %s ." % (refOut.name)) #os.system("cp %s ." % (varSam.name)) sam = pysam.Samfile(varSam.name) matches = 0.0 bases = 0.0 nReads = 0 minVarDiff = 10000 for read in sam: localSpots = [] nReads += 1 spot.tags["consensusCreated"] = True for svstart, svsize, svtype, altseq in expandCigar( read, args.minIndelSize, CONFIRMCOLLAPSE, True): newspot = copy.deepcopy(spot) if spot.svtype == svtype and svtype == "INS": #haveVar = True newspot.start = svstart + start - (buffer * 2) newspot.end = svstart + start - (buffer * 2) newspot.tags["seq"] = altseq newspot.size = svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq if abs(spot.tags["szMedian"] - newspot.size) < minVarDiff: minVarDiff = abs(spot.tags["szMedian"] - newspot.size) if args.reportContig: newspot.tags["contigseq"] = read.seq newspot.tags["contigqual"] = read.qual localSpots.append(newspot) elif spot.svtype == svtype and svtype == "DEL": #haveVar = True newspot.start = svstart + start - (buffer * 2) newspot.end = svstart + svsize + start - (buffer * 2) newspot.size = svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq newspot.tags["seq"] = reference.fetch( chrom, newspot.start, newspot.end) if abs(spot.tags["szMedian"] - newspot.size) < minVarDiff: minVarDiff = abs(spot.tags["szMedian"] - newspot.size) if args.reportContig: newspot.tags["contigseq"] = read.seq newspot.tags["contigqual"] = read.qual localSpots.append(newspot) if len(localSpots) > 0: mySpots.append((minVarDiff, localSpots)) #identity = matches/bases #If no var, nothing is returned. #for newspot in mySpots: #newspot.tags["alnIdentityEstimate"] = identity #Keep reporting the actual contigs out until we #find a reason to need it (and also we can get quals...) #vbam.reset() #for id, read in enumerate(vbam): #newspot.tags["contigSeq%d" % (id)] = read.seq #newspot.tags["contigQual%d" % (id)] = read.qual #vbam.close() #varBam.close() refOut.close() #logging.debug("%d consensus reads created %d spots" % (nReads, len(localSpots))) if len(mySpots) == 0: return [] mySpots.sort() return mySpots[0][1]
#!/usr/bin/env python from pbsuite.utils.FileHandlers import FastqFile, M5File from pbsuite.utils.CommandRunner import exe """ This can be run inside of an assembly folder and create our filling sequence into polish.out.fasta """ if __name__ == '__main__': input = FastqFile("input.fastq") fout = open("ref.fasta",'w') for i in input.values(): if i.name.startswith("ref"): fout.write(">%s\n%s\n" % (i.name, i.seq)) fout.close() print exe(("blasr input.fastq ref.fasta --bestn 2 -m 5 --noSplitSubreads > out.m5")) print exe(("python /stornext/snfs5/next-gen/scratch/english/Jelly/" "DevJelly/branches/consensusDev/GetSubs.py out.m5 input.fastq")) print exe(("python /stornext/snfs5/next-gen/scratch/english/Jelly/" "DevJelly/branches/sv/pbjPolish.py " "reads.fastq seed.fasta -n 4 -l"))
#!/usr/bin/python from pbsuite.utils.FileHandlers import FastqFile, M5File from pbsuite.utils.CommandRunner import exe """ This can be run inside of an assembly folder and create our filling sequence into polish.out.fasta """ if __name__ == '__main__': input = FastqFile("input.fastq") fout = open("ref.fasta",'w') for i in input.values(): if i.name.startswith("ref"): fout.write(">%s\n%s\n" % (i.name, i.seq)) fout.close() print exe(("blasr input.fastq ref.fasta --bestn 2 -m 5 --noSplitSubreads > out.m5")) print exe(("python /stornext/snfs5/next-gen/scratch/english/Jelly/" "DevJelly/branches/consensusDev/GetSubs.py out.m5 input.fastq")) print exe(("python /stornext/snfs5/next-gen/scratch/english/Jelly/" "DevJelly/branches/sv/pbjPolish.py " "reads.fastq seed.fasta -n 4 -l"))
def blasr(query, target, nproc=1, bestn=1, outName="map.m5"): """ runs blasr """ r,o,e = exe("blasr %s %s --bestn %d --affineAlign -m 5 --nproc %d --out %s" \ % (query, target, bestn, nproc, outName))
def bam2sam(fn, outName): """ Turns a bam to a sam """ return exe("samtools view -h %s > %s " % (fn, outName))
def consensusCalling(self, spot, bam, reference, args): """ Make a consensus of all the reads in the region and identify all of the SVs in the region """ # MAXNUMREADS = 100 #I don't think we'll need more than this many reads MAXATTEMPTS = 5 #MAXNUMREADS/2 #I don't feel like trying 100 times SPANBUFFER = 100 #number of bases I want a read to span chrom, start, end = spot.chrom, spot.start, spot.end buffer = args.buffer supportReads = [] spanReads = [] #Fetch reads and trim totCnt = 0 for read in bam.fetch(chrom, max(0, start-buffer-SPANBUFFER), end+buffer+SPANBUFFER): if read.qname not in spot.varReads: continue seq, qual = self.readTrim(read, start-buffer, end+buffer) if read.pos < start-SPANBUFFER and read.aend > end+SPANBUFFER: sz = spot.varReadsSize[spot.varReads.index(read.qname)] spanReads.append((abs(sz - spot.tags["szMedian"]), seq, qual)) else: supportReads.append((seq, qual)) totCnt += 1 if len(spanReads) == 0: logging.debug("noone spans - consensus aborted. %s" % (str(spot))) spot.tags["noSpan"] = True return [spot] #spanReads.sort(reverse=True) spanReads.sort() if len(spanReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads[:MAXNUMREADS]] elif len(spanReads) + len(supportReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads] + supportReads[:MAXNUMREADS-len(spanReads)] else: origSupportReads = [(x[1], x[2]) for x in spanReads] + supportReads mySpots = [] refReadId = 0 haveVar = False #Attempt each spanRead until we get one that passes while refReadId < len(spanReads) and not haveVar and refReadId < MAXATTEMPTS: refread = spanReads[refReadId] supportReads = origSupportReads[:refReadId] + origSupportReads[refReadId+1:] refReadId += 1 #read that spans most of the region goes first #use the rest for cleaning #building consensus sequence foutreads = NamedTemporaryFile(suffix=".fastq") for id, i in enumerate(supportReads): foutreads.write("@%d\n%s\n+\n%s\n" % (id, i[0], i[1])) foutreads.flush() foutref = NamedTemporaryFile(suffix=".fasta") foutref.write(">%s:%d-%d\n%s" % (spot.chrom, start, end, refread[1])) foutref.flush() alignOut = NamedTemporaryFile(suffix=".m5") logging.debug("making the contig....") #run it through phrap #then run it through consensus blasr(foutreads.name, foutref.name, format="-m 5", nproc=1, outname=alignOut.name) if args.consensus == "pbbanana": aligns = M5File(alignOut.name) con = ">con\n%s\n" % consensus(aligns).sequence conName = "pbbanana" elif args.consensus == "pbdagcon": logging.debug("pbdagcon is running") #using minerreads - 1 because one f them is already being used as seed! #I want to be sure I get something out... so just require somebody on there #r, con, e = exe("pbdagcon -c %d -t 0 %s" % (1, alignOut.name), timeout=1) #r, con, e = exe("pbdagcon -m 100 -c %d -t 0 %s" % (max(args.minErrReads - 1, 0), alignOut.name), timeout=1) r, con, e = exe("pbdagcon -m 100 -c %d -t 0 %s" % (3, alignOut.name), timeout=1) logging.debug("back from pbdagcon") logging.debug((r,e)) #raw_input("press ent") if con is not None: con = con[con.index("\n")+1:] else: con = "" conName = "pbdagcon" alignOut.close() foutref.close() foutreads.close() #we don't have a consensus - retry if len(con) == 0: logging.debug("Trying another seed read for consensus") continue logging.debug("%s %d bp seq" % (conName, len(con.split('\n')[1]))) #try improving consensus conOut = NamedTemporaryFile(suffix=".fasta") conOut.write(con) #conOut.close() conOut.flush() refOut = NamedTemporaryFile(suffix=".fasta") #j = reference.fetch(chrom, max(0, start-buffer), end+buffer) #fout = open("f****e.ref.fasta",'w') #fout.write(j) #fout.close() refOut.write(">%s:%d-%d\n%s\n" % (chrom, start, end, \ reference.fetch(chrom, max(0, start-(buffer*2)), end+(buffer*2)))) refOut.flush() #map consensus to refregion varSam = NamedTemporaryFile(suffix=".sam") blasr(conOut.name, refOut.name, format="-sam", outname=varSam.name,\ consensus=False) #-- would this help? #or what if I fed it through leftalign? #os.system("cp %s ." % (refOut.name)) #os.system("cp %s ." % (varSam.name)) sam = pysam.Samfile(varSam.name) matches = 0.0 bases = 0.0 nReads = 0 minVarDiff = 10000 for read in sam: localSpots = [] nReads += 1 spot.tags["consensusCreated"] = True for svstart, svsize, svtype, altseq in expandCigar(read, args.minIndelSize, CONFIRMCOLLAPSE, True): newspot = copy.deepcopy(spot) if spot.svtype == svtype and svtype == "INS": #haveVar = True newspot.start = svstart + start - (buffer*2) newspot.end = svstart + start - (buffer*2) newspot.tags["seq"] = altseq newspot.size = svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq if abs(spot.tags["szMedian"] - newspot.size) < minVarDiff: minVarDiff = abs(spot.tags["szMedian"] - newspot.size) if args.reportContig: newspot.tags["contigseq"] = read.seq newspot.tags["contigqual"] = read.qual localSpots.append(newspot) elif spot.svtype == svtype and svtype == "DEL": #haveVar = True newspot.start = svstart + start - (buffer*2) newspot.end = svstart + svsize + start - (buffer*2) newspot.size = svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq newspot.tags["seq"] = reference.fetch(chrom, newspot.start, newspot.end) if abs(spot.tags["szMedian"] - newspot.size) < minVarDiff: minVarDiff = abs(spot.tags["szMedian"] - newspot.size) if args.reportContig: newspot.tags["contigseq"] = read.seq newspot.tags["contigqual"] = read.qual localSpots.append(newspot) if len(localSpots) > 0: mySpots.append((minVarDiff, localSpots)) #identity = matches/bases #If no var, nothing is returned. #for newspot in mySpots: #newspot.tags["alnIdentityEstimate"] = identity #Keep reporting the actual contigs out until we #find a reason to need it (and also we can get quals...) #vbam.reset() #for id, read in enumerate(vbam): #newspot.tags["contigSeq%d" % (id)] = read.seq #newspot.tags["contigQual%d" % (id)] = read.qual #vbam.close() #varBam.close() refOut.close() #logging.debug("%d consensus reads created %d spots" % (nReads, len(localSpots))) if len(mySpots) == 0: return [] mySpots.sort() return mySpots[0][1]
def assemble(inputFq, workDir): return exe("OLCAssembly.py %s --nproc 4 --fqOut --workDir %s" % (inputFq, workDir))
def pileup(bam): """ create a pileup from the bam """ return exe("samtools mpileup -f {0} {1} > {1}.plup".format(reference, bam))
def grabReads( inputBam, entry, outFn ): """ Gets all of the reads for a region and puts them into outFn """ return exe("samtools view -h %s %s > %s" % (inputBam, entry.region, outFn))
def bam2sam( fn, outName): """ Turns a bam to a sam """ return exe("samtools view -h %s > %s " % (fn, outName))
def __assemble(self): """ writes temp files assembles reads results clears temp files returns results as a string Calls the assembler """ self.myTmpFiles = [] #Temporary Files fout = tempfile.NamedTemporaryFile(prefix="spades_pe1", suffix=".fastq", delete=False, dir=self.tmpDir, mode="w") self.myTmpFiles.append(fout.name) for name, seq, qual in self.leftReads: fout.write("@%s\n%s\n+\n%s\n" % (name, seq, qual)) fout.close() fout2 = tempfile.NamedTemporaryFile(prefix="spades_pe2", suffix=".fastq", delete=False, dir=self.tmpDir, mode="w") self.myTmpFiles.append(fout2.name) for name, seq, qual in self.rightReads: fout2.write("@%s\n%s\n+\n%s\n" % (name, seq, qual)) fout2.close() foutp = tempfile.NamedTemporaryFile(prefix="spades_pb", suffix=".fastq", delete=False, dir=self.tmpDir, mode="w") self.myTmpFiles.append(foutp.name) for name, seq, qual in self.pbReads: foutp.write("@%s\n%s\n+\n%s\n" % (name, seq, qual)) foutp.close() #working here resultOut = tempfile.mkdtemp(prefix="spades", dir=self.tmpDir) estSize = self.buffer * 2 if self.data.rest[0] != 'DEL': estSize += int(self.data.rest[1]) #r, o, e = exe("dipspades.py -1 {pe1} -2 {pe2} --pacbio {pacbio} -o {output} "\ r, o, e = exe("spades.py -1 {pe1} -2 {pe2} --pacbio {pacbio} -o {output} "\ .format(pe1=fout.name, pe2=fout2.name, pacbio=foutp.name, output=resultOut), \ timeout=self.timeout) logging.debug("RET - %d\nOUT - %s\nERR- %s" % (r, o, e)) #just the output dir, maybe? self.myTmpFiles.append(resultOut) if r == 214: super(SpadesAssembler, self).cleanupTmp() return "Failure - Assembly Timeout " + self.data.name outFsta = os.path.join(resultOut, "dipspades", "consensus_contigs.fasta") fasta = FastaFile(outFsta) results = {} for key in fasta: results[key] = FastqEntry(key, fasta[key], '?' * len(fasta[key])) #save to file fout = tempfile.NamedTemporaryFile(prefix = "asm" + self.data.name, mode="w", \ suffix=".fastq", delete=False, dir=self.tmpDir) for key in results: fout.write("@group" + self.data.name + "_" + key + "\n" + \ results[key].seq + '\n+\n' + \ results[key].qual + '\n') fout.close() self.results = fout.name #clean up super(SpadesAssembler, self).cleanupTmp() return self.results
def run(self): #Fasta Ref Output scaffTempName = self.scaffInput + ".tempFasta" scaffOutput = open(scaffTempName, 'w') #Qual Ref Output if self.qualInput is not None: qualTempName = self.qualInput + ".tempQual" qualOutput = open(qualTempName, 'w') #Gaps Output if self.opts.gapOutput is not None: gapTableOut = open(self.opts.gapOutput, 'w') else: gapTableOut = False logging.info( "Creating reference sequence index names and identifying gaps") refTemplate = "ref%07d" refId = 1 #Read References reference = FastaFile(self.scaffInput) if self.qualInput is not None: qualReference = QualFile(self.qualInput) for key in reference: scaffIndex = refTemplate % refId scaffName = key.replace(' ', '_') refId += 1 scaffName = scaffName + "|" + scaffIndex scaffOutput.write(">" + scaffName + "\n" + wrap(reference[key]) + "\n") if self.qualInput is not None: qualOutput.write(">" + scaffName + "\n" + qwrap(qualReference[key]) + "\n") gapCoords = [] for gap in re.finditer("[^Nn]([Nn]{%d,%s})[^Nn]" % \ (self.opts.minGap, self.opts.maxGap), reference[key]): gapCoords.append([gap.start() + 1, gap.end() - 1]) if len(gapCoords) == 0: #no Gaps gapTableOut.write("\t".join( [scaffName, 'na', 'na', scaffIndex + "_0_0", '3']) + '\n') logging.debug("Scaffold %s is empty" % scaffName) continue #Consolidate gaps that are too close -- indicating LQ regions. i = 0 while i < len(gapCoords) - 1: if gapCoords[i + 1][0] - gapCoords[i][1] < 25: gapCoords[i + 1][0] = gapCoords[i][0] del (gapCoords[i]) else: i += 1 prevEnd = 0 #Contig Start Tracking idx = 0 #Make the first gap prevEnd = gapCoords[0][1] gapCoords[0][1] - gapCoords[0][0] flag = Gap.BEGIN if len(gapCoords) == 1: flag += Gap.END if gapTableOut: gapTableOut.write("%s\t%i\t%i\t%s_%i_%i\t%d\n" \ % (scaffName, gapCoords[0][0], gapCoords[0][1], scaffIndex, idx, idx+1, flag)) #Now Go Through the rest of the gaps for i in range(1, len(gapCoords)): idx += 1 prevEnd = gapCoords[i][1] gapCoords[i][1] - gapCoords[i][0] if gapTableOut: if i == len(gapCoords) - 1: flag = Gap.END else: flag = 0 gapTableOut.write("%s\t%i\t%i\t%s_%i_%i\t%d\n" \ % (scaffName, gapCoords[i][0], gapCoords[i][1], scaffIndex, idx, idx+1, flag)) #Close shop scaffOutput.close() os.rename(self.scaffInput, self.scaffInput + ".original") os.rename(scaffTempName, self.scaffInput) if self.qualInput is not None: qualOutput.close() os.rename(self.qualInput, self.qualInput + ".original") os.rename(qualTempName, self.qualInput) if gapTableOut: gapTableOut.close() if self.opts.index: logging.info("Creating .sa indexes for references") r, o, e = exe("sawriter %s.sa %s" % (self.scaffInput, self.scaffInput)) if r != 0: logging.error("sawriter returned %d" % r) logging.error("Ensure it's in your path") exit(1) logging.debug(str(o) + ' ' + str(e)) logging.info("Finished!")
def grabReads(inputBam, entry, outFn): """ Gets all of the reads for a region and puts them into outFn """ return exe("samtools view -h %s %s > %s" % (inputBam, entry.region, outFn))
def run(self): #Fasta Ref Output scaffTempName = self.scaffInput+".tempFasta" scaffOutput = open(scaffTempName, 'w') #Qual Ref Output if self.qualInput is not None: qualTempName= self.qualInput+".tempQual" qualOutput = open(qualTempName, 'w') #Gaps Output if self.opts.gapOutput is not None: gapTableOut = open(self.opts.gapOutput,'w') else: gapTableOut = False logging.info("Creating reference sequence index names and identifying gaps") refTemplate = "ref%07d" refId = 1 #Read References reference = FastaFile(self.scaffInput) if self.qualInput is not None: qualReference = QualFile(self.qualInput) for key in reference: scaffIndex = refTemplate % refId scaffName = key.replace(' ','_') refId += 1 scaffName = scaffName + "|" + scaffIndex scaffOutput.write(">"+scaffName+"\n"+wrap(reference[key])+"\n") if self.qualInput is not None: qualOutput.write(">"+scaffName+"\n"+qwrap(qualReference[key])+"\n") gapCoords = [] for gap in re.finditer("[^Nn]([Nn]{%d,%s})[^Nn]" % \ (self.opts.minGap, self.opts.maxGap), reference[key]): gapCoords.append([gap.start() + 1, gap.end() - 1]) if len(gapCoords) == 0:#no Gaps gapTableOut.write("\t".join([scaffName, 'na', 'na', scaffIndex+"_0_0", '3'])+'\n') logging.debug("Scaffold %s is empty" % scaffName) continue #Consolidate gaps that are too close -- indicating LQ regions. i = 0 while i < len(gapCoords)-1: if gapCoords[i+1][0] - gapCoords[i][1] < 25: gapCoords[i+1][0] = gapCoords[i][0] del(gapCoords[i]) else: i += 1 prevEnd = 0#Contig Start Tracking idx = 0 #Make the first gap prevEnd = gapCoords[0][1] gapCoords[0][1]-gapCoords[0][0] flag = Gap.BEGIN if len(gapCoords) == 1: flag += Gap.END if gapTableOut: gapTableOut.write("%s\t%i\t%i\t%s_%i_%i\t%d\n" \ % (scaffName, gapCoords[0][0], gapCoords[0][1], scaffIndex, idx, idx+1, flag)) #Now Go Through the rest of the gaps for i in range(1, len(gapCoords)): idx += 1 prevEnd = gapCoords[i][1] gapCoords[i][1]-gapCoords[i][0] if gapTableOut: if i == len(gapCoords)-1: flag = Gap.END else: flag = 0 gapTableOut.write("%s\t%i\t%i\t%s_%i_%i\t%d\n" \ % (scaffName, gapCoords[i][0], gapCoords[i][1], scaffIndex, idx, idx+1, flag)) #Close shop scaffOutput.close() os.rename(self.scaffInput, self.scaffInput+".original") os.rename(scaffTempName, self.scaffInput) if self.qualInput is not None: qualOutput.close() os.rename(self.qualInput, self.qualInput+".original") os.rename(qualTempName, self.qualInput) if gapTableOut: gapTableOut.close() if self.opts.index: logging.info("Creating .sa indexes for references") r, o, e = exe("sawriter %s.sa %s" % (self.scaffInput, self.scaffInput)) if r != 0: logging.error("sawriter returned %d" % r) logging.error("Ensure it's in your path") exit(1) logging.debug(str(o) + ' ' + str(e)) logging.info("Finished!")
def pileup( bam ): """ create a pileup from the bam """ return exe("samtools mpileup -f {0} {1} > {1}.plup".format(reference, bam))
def consensusCalling(self, spot, bam, reference, args): """ Make a consensus of all the reads in the region and identify all of the SVs in the region """ # MAXNUMREADS = 100 #I don't think we'll need more than this many reads MAXATTEMPTS = MAXNUMREADS / 2 #I don't feel like trying 100 times SPANBUFFER = 100 #number of bases I want a read to span chrom, start, end = spot.chrom, spot.start, spot.end buffer = args.buffer supportReads = [] spanReads = [] #Fetch reads and trim totCnt = 0 for read in bam.fetch(chrom, max(0, start - buffer - SPANBUFFER), end + buffer + SPANBUFFER): if read.qname not in spot.varReads: continue seq, qual = self.readTrim(read, start - buffer, end + buffer) if read.pos < start - SPANBUFFER and read.aend > end + SPANBUFFER: spanReads.append((len(seq), seq, qual)) else: supportReads.append((seq, qual)) totCnt += 1 if len(spanReads) == 0: logging.debug("noone spans - consensus aborted. %s" % (str(spot))) spot.tags["noSpan"] = True return [spot] spanReads.sort(reverse=True) if len(spanReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads[:MAXNUMREADS]] elif len(spanReads) + len(supportReads) > MAXNUMREADS: origSupportReads = [(x[1], x[2]) for x in spanReads ] + supportReads[:MAXNUMREADS - len(spanReads)] else: origSupportReads = [(x[1], x[2]) for x in spanReads] + supportReads logging.debug("Alt reads: %d total, %d extra support" % (totCnt, len(origSupportReads))) mySpots = [] refReadId = 0 haveVar = False #Attempt each spanRead until we get one that passes #while refReadId < len(spanReads) and not haveVar and refReadId < MAXATTEMPTS: #refread = spanReads[refReadId] #supportReads = origSupportReads[:refReadId] + origSupportReads[refReadId+1:] refReadId += 1 #read that spans most of the region goes first #use the rest for cleaning #building consensus sequence foutreads = NamedTemporaryFile(suffix=".fasta") qoutreads = open(foutreads.name + '.qual', 'w') for id, i in enumerate(origSupportReads): foutreads.write(">%d\n%s\n" % (id, i[0])) qoutreads.write(">%d\n%s\n" % (id, " ".join(str(ord(j) - 33) for j in i[1]))) foutreads.flush() qoutreads.flush() #foutref = NamedTemporaryFile(suffix=".fasta") #foutref.write(">%s:%d-%d\n%s" % (spot.chrom, start, end, refread[1])) #foutref.flush() logging.debug("Making the contig....") #run it through phrap #make out.fasta and out.fasta.qual #run phrap #if asm -- consensus only r, o, e = exe("phrap %s -minmatch 6 -minscore 20" % (foutreads.name), timeout=3) if r != 0: #failed logging.warning('phrap failed ' + self.name) logging.warning(o) logging.warning(e) return [ ] #here is where I'd like to add just the no-consensus spot results = mergeFastaQual(foutreads.name + ".contigs", foutreads.name + ".contigs.qual") if len(results) == 0: logging.warning('no asm made ' + self.name) return [ ] #here is where I'd like to add just the no-consensus spot logging.info('%d contigs made %s' % (len(results), self.name)) #then run it through consensus logging.debug("Polishing contigs") alignOut = NamedTemporaryFile(suffix=".m5") blasr(foutreads.name, foutreads.name + ".contigs", format="-m 5", nproc=1, outname=alignOut.name) # elif no asm and consensus only (faster) if args.polish == "pbbanana": aligns = M5File(alignOut.name) con = ">con\n%s\n" % consensus(aligns).sequence conName = "pbbanana" elif args.polish == "pbdagcon": logging.debug("pbdagcon is running") #using minerrreads - 1 because one f them is already being used as seed! r, con, e = exe("pbdagcon -c %d -t 0 %s" % (max(0, args.minErrReads - 1), alignOut.name), timeout=1) #r, con, e = exe("pbdagcon %s" % (alignOut.name), timeout=2) logging.debug("back from pbdagcon") logging.debug((r, e)) #raw_input("press ent") if con is not None: con = con[con.index("\n") + 1:] else: con = "" conName = "pbdagcon" alignOut.close() #foutref.close() foutreads.close() #we don't have a consensus - retry if len(con) == 0: logging.debug("Trying another seed read for consensus") con = results.values()[0].seq logging.debug("%s %d bp seq" % (conName, len(con.split('\n')[1]))) #try improving consensus conOut = NamedTemporaryFile(suffix=".fasta") conOut.write(con) #conOut.close() conOut.flush() refOut = NamedTemporaryFile(suffix=".fasta") #j = reference.fetch(chrom, max(0, start-buffer), end+buffer) #fout = open("f****e.ref.fasta",'w') #fout.write(j) #fout.close() refOut.write(">%s:%d-%d\n%s\n" % (chrom, start, end, \ reference.fetch(chrom, max(0, start-buffer), end+buffer))) refOut.flush() #map consensus to refregion varSam = NamedTemporaryFile(suffix=".sam") blasr(conOut.name, refOut.name, format="--sam", outname=varSam.name) #consensus=False) -- would this help? #or what if I fed it through leftalign? sam = pysam.Samfile(varSam.name) matches = 0.0 bases = 0.0 nReads = 0 mySpots = [] for read in sam: nReads += 1 spot.tags["consensusCreated"] = True for svstart, svsize, svtype, altseq in expandCigar( read, args.minIndelSize, CONFIRMCOLLAPSE, True): newspot = copy.deepcopy(spot) if spot.svtype == svtype and svtype == "INS": haveVar = True newspot.start = svstart + start - buffer newspot.end = svstart + start - buffer newspot.tags["seq"] = altseq newspot.size = svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq mySpots.append(newspot) elif spot.svtype == svtype and svtype == "DEL": haveVar = True newspot.start = svstart + start - buffer newspot.end = svstart + svsize + start - buffer newspot.size = -svsize gt, gq = genotype(newspot) newspot.tags["GT"] = gt newspot.tags["GQ"] = gq newspot.tags["seq"] = reference.fetch( chrom, newspot.start, newspot.end) mySpots.append(newspot) #identity = matches/bases #If no var, nothing is returned. #for newspot in mySpots: #newspot.tags["alnIdentityEstimate"] = identity #Keep reporting the actual contigs out until we #find a reason to need it (and also we can get quals...) #vbam.reset() #for id, read in enumerate(vbam): #newspot.tags["contigSeq%d" % (id)] = read.seq #newspot.tags["contigQual%d" % (id)] = read.qual #vbam.close() #varBam.close() refOut.close() logging.debug("%d consensus reads created %d spots" % (nReads, len(mySpots))) return mySpots
def __assemble(self, reads): """ writes temp files assembles reads results clears temp files returns results as a string Calls the assembler """ self.myTmpFiles = [] #Temporary Files fout = tempfile.NamedTemporaryFile(suffix=".fasta", mode="w", delete=False, dir=self.tmpDir) self.myTmpFiles.append(fout.name) qout = open(fout.name + '.qual', 'w') self.myTmpFiles.append(fout.name + '.qual') for name, seq, qual in reads: fout.write(">{0}\n{1}\n".format(name, seq)) qout.write(">{0}\n{1}\n".format(name, qual)) fout.close() qout.close() r, o, e = exe("phrap %s -minmatch 6 -minscore 20" % (fout.name),\ timeout=self.timeout) self.myTmpFiles.extend([fout.name + ".contigs", fout.name + ".contigs.qual", \ fout.name + ".problems", fout.name + ".problems.qual", \ fout.name + ".log", fout.name + ".singlets"]) if r == 214: super(PhrapAssembler, self).cleanupTmp() return "Failure - Assembly Timeout " + self.data.name results = mergeFastaQual(fout.name + ".contigs", fout.name + ".contigs.qual") #Try to push the problems through, too if os.stat(fout.name + '.problems').st_size != 0: pfile = fout.name + ".problems" r, o, e = exe("phrap %s -minmatch 6 -minscore 20" % (pfile), \ timeout=self.timeout) self.myTmpFiles.extend([pfile + ".contigs", pfile + ".contigs.qual", \ pfile + ".problems", pfile + ".problems.qual", \ pfile + ".log", pfile + ".singlets"]) if r == 214: super(PhrapAssembler, self).cleanupTmp() return "Failure - Assembly Timeout " + self.data.name results.update(mergeFastaQual(fout.name + ".problems.contigs", fout.name + ".problems.contigs.qual")) #save to file fout = tempfile.NamedTemporaryFile(prefix = "asm" + self.data.name, mode="w",\ suffix=".fastq", delete=False, dir=self.tmpDir) for key in results: fout.write("@group" + self.data.name + "_" + key + "\n" + \ results[key].seq + '\n+\n' + \ results[key].qual + '\n') fout.close() self.results = fout.name #clean up super(PhrapAssembler, self).cleanupTmp() return self.results