def main(): args = check_options(get_options()) genomesize = int(os.path.getsize(args.genome)/1e6) kmer = int(log(genomesize, 4)+1) if kmer < 17: kmer = 17 #jellyfish par lowercount = 2 #jellyfish par jfsize = '100M' # splite sequence longer than 10M spsize = 10000000 step = args.step maxkmerscore = int(((args.length * args.homology / 100) - kmer) * args.ploidy/2 + 0.5 ) jfpool = Pool(args.threads) # ?build kmerindex jfkmerfile = os.path.join(args.saved,(os.path.basename(args.genome)+'_'+str(kmer)+'mer.jf')) kmerbuild = True if os.path.isfile(jfkmerfile): if not args.docker: print("find:", jfkmerfile) kmmess = "Found kmerfile "+jfkmerfile+". Do you want rebuild it? Press Y or N to continue:" print(kmmess) while True: char = getch() if char.lower() in ("y", "n"): print(char) if char == 'y': kmerbuild = True elif char == 'n': kmerbuild = False break # ?build bwa index bwaindexfile = os.path.basename(args.genome) bwatestindex = os.path.join(args.saved, bwaindexfile+'.sa') bwaindex = os.path.join(args.saved, bwaindexfile) bwabuild = True if os.path.isfile(bwatestindex): if not args.docker: print('find:', bwatestindex) bwamess = "Found bwa index file " + bwatestindex + ". Do you want rebuild it? Press Y or N to continue:" print(bwamess) while True: char = getch() if char.lower() in ("y", "n"): print(char) if char == 'y': bwabuild = True elif char == 'n': bwabuild = False break print("genomesize:",genomesize, "kmer:",kmer, "jfkmerfile:", jfkmerfile, "kmerbuild:", kmerbuild, "bwabuild:", bwabuild, "threads:", args.threads) # Build Jellyfish index if kmerbuild: jfcount = jellyfish.jfcount(jfpath=args.jellyfish, mer=kmer, infile=args.genome, output=jfkmerfile, threads=args.threads, lowercount=lowercount, size=jfsize) if jfcount: print("JellyFish Count finished ...") else: print("JellyFish Count Error!!!") sys.exit(1) else: print("Use ", jfkmerfile) # End build Jellyfish index if bwabuild: bwa.bwaindex(args.bwa, args.genome, args.saved) print("bwa index build finished ...") else: print("Use", bwatestindex) jffilteredprobe = list() ##### if genomesize < 1000: fastain = Fasta(args.input) jffpbrunerlist = list() for seqname in fastain.keys(): chrlen = len(fastain[seqname]) if chrlen < spsize: start = 0 end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=args.jellyfish, jfkmerfile=jfkmerfile, mer=kmer, pyfasta=fastain, seqname=seqname, pblength=args.length, maxkmerscore=maxkmerscore, start=start, end=end, step=step) jffpbrunerlist.append(jffpbruner) else: chrblock = int(chrlen/spsize) + 1 for i in range(chrblock): start = i * spsize end = start + spsize - 1 if end >= chrlen: end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=args.jellyfish, jfkmerfile=jfkmerfile, mer=kmer, pyfasta=fastain, seqname=seqname, pblength=args.length, maxkmerscore=maxkmerscore, start=start, end=end, step=step) jffpbrunerlist.append(jffpbruner) jffinished = 0 print(len(jffpbrunerlist)) for curpblist in jfpool.imap_unordered(jellyfish.kmerfilterprobe, jffpbrunerlist): jffilteredprobe.extend(curpblist) jffinished += 1 print("Jellyfish filter: ",jffinished,'/',len(jffpbrunerlist), sep='') jfpool.close() print('Jellyfish filter finished!!') else: ### split fa file when geome size greater than 1 Gb print("genome size > 1G") subFas = spgenome.spgenome(args.input, args.saved) for subFafile in subFas: print(subFafile) fastain = Fasta(subFafile) jffpbrunerlist = list() for seqname in fastain.keys(): chrlen = len(fastain[seqname]) if chrlen < spsize: start = 0 end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=args.jellyfish, jfkmerfile=jfkmerfile, mer=kmer, pyfasta=fastain, seqname=seqname, pblength=args.length, maxkmerscore=maxkmerscore, start=start, end=end, step=step) jffpbrunerlist.append(jffpbruner) else: chrblock = int(chrlen / spsize) + 1 for i in range(chrblock): start = i * spsize end = start + spsize - 1 if end >= chrlen: end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=args.jellyfish, jfkmerfile=jfkmerfile, mer=kmer, pyfasta=fastain, seqname=seqname, pblength=args.length, maxkmerscore=maxkmerscore, start=start, end=end, step=step) jffpbrunerlist.append(jffpbruner) jffinished = 0 print(len(jffpbrunerlist)) for curpblist in jfpool.imap_unordered(jellyfish.kmerfilterprobe, jffpbrunerlist): jffilteredprobe.extend(curpblist) jffinished += 1 print(subFafile + " Jellyfish filter: ", jffinished, '/', len(jffpbrunerlist), sep='') jfpool.close() print('Jellyfish filter finished!!') tmppbfa = os.path.join(args.saved, os.path.basename(args.input)+'_tmp_probe.fa') tmppbfaio = open(tmppbfa, 'w') seqnum = 0 for tmppb in jffilteredprobe: print('>','seq',seqnum, sep='',file=tmppbfaio) print(tmppb,file=tmppbfaio) seqnum += 1 tmppbfaio.close() del jffilteredprobe bwafiltedpb = bwa.bwafilter(bwabin=args.bwa, reffile=bwaindex, inputfile=tmppbfa, minas=args.length, maxxs=int(args.length*args.homology/100), threadnumber=args.threads) # print(bwafiltedpb) tmpbwaftlist = os.path.join(args.saved, os.path.basename(args.input)+'.bed') alltmpbwaftlist = os.path.join(args.saved, os.path.basename(args.input)+'_all.bed') tmpbwaftlistio = open(tmpbwaftlist,'w') allbwaftlistio = open(alltmpbwaftlist,'w') seqlenfile = os.path.join(args.saved, os.path.basename(args.input)+'.len') seqlenio = open(seqlenfile,'w') seqlength = bwa.bwareflength(bwabin=args.bwa, reffile=bwaindex) for seqname in seqlength: print(seqname, seqlength[seqname], sep='\t', file=seqlenio) seqlenio.close() oligobefortmf = list() for pbtmp in bwafiltedpb: # print(pbtmp, file=tmpbwaftlistio) nowpbcounter = dict() nowpbcounter['seq'] = pbtmp nowpbcounter['dTm'] = args.dtm nowpbcounter['rprimer'] = args.primer oligobefortmf.append(nowpbcounter) keepedprobe = list() ctedpb = 0 oligobefortmflen = len(oligobefortmf) print("oligobefortmflen:",oligobefortmflen) pbftpool = Pool() for (pb, keep) in pbftpool.imap_unordered(probefilter, oligobefortmf): if keep: keepedprobe.append(pb) # print(pb, file=tmpbwaftlistio) ctedpb += 1 if ctedpb % 10000 == 0: print(ctedpb,'/',oligobefortmflen) pbdictbychr = dict() pbftpool.close() for pb in keepedprobe: seq, chro, start = pb.split('\t') start = int(start) if chro in pbdictbychr: pbdictbychr[chro][start] = seq else: pbdictbychr[chro] = dict() pbdictbychr[chro][start] = seq lenrprimer = len(args.primer) if lenrprimer == 0: lenrprimer = 5 slidwindow = lenrprimer+args.length for chro in pbdictbychr: startn = 0 for startnow in sorted(pbdictbychr[chro]): endnow = startnow + args.length - 1 print(chro, startnow, endnow, pbdictbychr[chro][startnow],file=allbwaftlistio,sep='\t') if startnow > startn+slidwindow: #startn = startnow+slidwindow startn = startnow print(chro, startnow, endnow, pbdictbychr[chro][startnow], file=tmpbwaftlistio, sep='\t') tmpbwaftlistio.close() allbwaftlistio.close() print("Job finshed!!")
def run(self): if self.kmerbuild: jfcounter = jellyfish.jfcount(jfpath=self.jellyfishpath, mer=self.kmer, infile=self.genomefile, output=self.jfkmerfile, threads=self.threadsnumber, lowercount=self.lowercount, size=self.size) """ check jelly fish count run correctly """ if jfcounter: self.progressnumber = self.progressnumber + 5 self.notifyProgress.emit(self.progressnumber) self.notifyMessage.emit("JellyFish Count finished...") else: self.notifyMessage.emit("JellyFish Count Error!!!") else: jfcountmess = "Use " + self.jfkmerfile self.progressnumber = self.progressnumber + 5 self.notifyProgress.emit(self.progressnumber) self.notifyMessage.emit(jfcountmess) if self.indexbuild: if self.aligner == 'BWA': bwa.bwaindex(self.alnpath, self.genomefile, self.samplefolder) self.notifyMessage.emit("BWA Index build finished...") self.progressnumber = self.progressnumber + 5 self.notifyProgress.emit(self.progressnumber) elif self.aligner == 'BLAT': """ add code for BLAT """ pass else: self.progressnumber = self.progressnumber + 5 self.notifyProgress.emit(self.progressnumber) """ load and splite input file """ # splite sequence longer than 10M spsize = 10000000 maxkmerscore = int(self.pblength * self.homology / 100) - self.kmer jffilteredprobe = list() fastain = Fasta(self.inputfile) jffpbrunerlist = list() for seqname in fastain.keys(): chrlen = len(fastain[seqname]) if chrlen < spsize: start = 0 end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=self.jellyfishpath, jfkmerfile=self.jfkmerfile, mer=self.kmer, pyfasta=fastain, seqname=seqname, pblength=self.pblength, maxkmerscore=maxkmerscore, start=start, end=end, step=self.step) jffpbrunerlist.append(jffpbruner) else: chrblock = int(chrlen / spsize) + 1 for i in range(chrblock): start = i * spsize end = start + spsize - 1 if end >= chrlen: end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner( jfpath=self.jellyfishpath, jfkmerfile=self.jfkmerfile, mer=self.kmer, pyfasta=fastain, seqname=seqname, pblength=self.pblength, maxkmerscore=maxkmerscore, start=start, end=end, step=self.step) jffpbrunerlist.append(jffpbruner) jffinished = 0 for curpblist in self.pool.imap_unordered(jellyfish.kmerfilterprobe, jffpbrunerlist): jffilteredprobe.extend(curpblist) tmpprogress = float( format( self.progressnumber + (jffinished / len(jffpbrunerlist) * 40), ".2f")) self.notifyProgress.emit(tmpprogress) if self.isRunning(): print("running") else: print("not running") jffinished += 1 self.notifyMessage.emit('kmer filter finished!!') self.progressnumber = 50.0 self.notifyProgress.emit(self.progressnumber) tmppbfa = os.path.join( self.samplefolder, os.path.basename(self.inputfile) + '_tmp_probes.fa') tmppbfaio = open(tmppbfa, 'w') seqnum = 0 for tmppb in jffilteredprobe: print('>', 'seq', seqnum, sep='', file=tmppbfaio) print(tmppb, file=tmppbfaio) seqnum += 1 tmppbfaio.close() #delete jffilteredprobe and release memory del jffilteredprobe bwaindexfile = os.path.join(self.samplefolder, os.path.basename(self.genomefile)) bwafiltedpb = bwa.bwafilter(bwabin=self.alnpath, reffile=bwaindexfile, inputfile=tmppbfa, minas=self.pblength, maxxs=int(self.pblength * self.homology / 100), threadnumber=self.threadsnumber) tmpbwaftlist = os.path.join(self.samplefolder, os.path.basename(self.inputfile) + '.bed') alltmpbwaftlist = os.path.join( self.samplefolder, os.path.basename(self.inputfile) + '_all.bed') tmpbwaftlistio = open(tmpbwaftlist, 'w') allbwaftlistio = open(alltmpbwaftlist, 'w') seqlenfile = os.path.join(self.samplefolder, os.path.basename(self.inputfile)) + '.len' seqlenio = open(seqlenfile, 'w') seqlength = bwa.bwareflength(bwabin=self.alnpath, reffile=bwaindexfile) for seqname in seqlength: print(seqname, seqlength[seqname], sep='\t', file=seqlenio) seqlenio.close() oligobefortmf = list() for pbtmp in bwafiltedpb: # print(pbtmp, file=tmpbwaftlistio) nowpbcounter = dict() nowpbcounter['seq'] = pbtmp nowpbcounter['dTm'] = self.dTm nowpbcounter['rprimer'] = self.rprimer oligobefortmf.append(nowpbcounter) keepedprobe = list() self.progressnumber = 55 self.notifyProgress.emit(self.progressnumber) ctedpb = 0 oligobefortmflen = len(oligobefortmf) for (pb, keep) in self.pool.imap_unordered(probefilter, oligobefortmf): if keep: keepedprobe.append(pb) # print(pb, file=tmpbwaftlistio) ctedpb += 1 if ctedpb % 10000 == 0: tmpprogress = float( format( self.progressnumber + (ctedpb / oligobefortmflen * 30), ".2f")) self.notifyProgress.emit(tmpprogress) self.notifyProgress.emit(90) pbdictbychr = dict() #load pb to dict for pb in keepedprobe: # print(pb, file=tmpbwaftlistio) seq, chro, start = pb.split('\t') start = int(start) if chro in pbdictbychr: pbdictbychr[chro][start] = seq else: pbdictbychr[chro] = dict() pbdictbychr[chro][start] = seq #get lenth of primer lenrprimer = len(self.rprimer) if lenrprimer == 0: lenrprimer = 5 slidwindow = lenrprimer + self.pblength for chro in pbdictbychr: startn = 0 for startnow in sorted(pbdictbychr[chro]): endnow = startnow + self.pblength - 1 print(chro, startnow, endnow, pbdictbychr[chro][startnow], file=allbwaftlistio, sep='\t') if startnow > startn + slidwindow: #startn = startnow+slidwindow startn = startnow print(chro, startnow, endnow, pbdictbychr[chro][startnow], file=tmpbwaftlistio, sep='\t') tmpbwaftlistio.close() allbwaftlistio.close() #remove temp fasta file # os.remove(tmppbfa) self.notifyProgress.emit(100) self.notifyMessage.emit('all finished!!')
def main(): args = check_options(get_options()) genomesize = int(os.path.getsize(args.genome)/1e6) kmer = int(log(genomesize, 4)+1) if kmer < 17: kmer = 17 #jellyfish par lowercount = 2 #jellyfish par jfsize = '100M' # splite sequence longer than 10M spsize = 10000000 step = args.step maxkmerscore = int(args.length * args.homology / 100) - kmer jfpool = Pool(args.threads) # ?build kmerindex jfkmerfile = os.path.join(args.saved,(os.path.basename(args.genome)+'_'+str(kmer)+'mer.jf')) kmerbuild = True if os.path.isfile(jfkmerfile): if not args.docker: print("find:", jfkmerfile) kmmess = "Found kmerfile "+jfkmerfile+". Do you want rebuild it? Press Y or N to continue:" print(kmmess) while True: char = getch() if char.lower() in ("y", "n"): print(char) if char == 'y': kmerbuild = True elif char == 'n': kmerbuild = False break # ?build bwa index bwaindexfile = os.path.basename(args.genome) bwatestindex = os.path.join(args.saved, bwaindexfile+'.sa') bwaindex = os.path.join(args.saved, bwaindexfile) bwabuild = True if os.path.isfile(bwatestindex): if not args.docker: print('find:', bwatestindex) bwamess = "Found bwa index file " + bwatestindex + ". Do you want rebuild it? Press Y or N to continue:" print(bwamess) while True: char = getch() if char.lower() in ("y", "n"): print(char) if char == 'y': bwabuild = True elif char == 'n': bwabuild = False break print("genomesize:",genomesize, "kmer:",kmer, "jfkmerfile:", jfkmerfile, "kmerbuild:", kmerbuild, "bwabuild:", bwabuild, "threads:", args.threads) # Build Jellyfish index if kmerbuild: jfcount = jellyfish.jfcount(jfpath=args.jellyfish, mer=kmer, infile=args.genome, output=jfkmerfile, threads=args.threads, lowercount=lowercount, size=jfsize) if jfcount: print("JellyFish Count finished ...") else: print("JellyFish Count Error!!!") sys.exit(1) else: print("Use ", jfkmerfile) # End build Jellyfish index if bwabuild: bwa.bwaindex(args.bwa, args.genome, args.saved) print("bwa index build finished ...") else: print("Use", bwatestindex) jffilteredprobe = list() fastain = Fasta(args.input) jffpbrunerlist = list() for seqname in fastain.keys(): chrlen = len(fastain[seqname]) if chrlen < spsize: start = 0 end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=args.jellyfish, jfkmerfile=jfkmerfile, mer=kmer, pyfasta=fastain, seqname=seqname, pblength=args.length, maxkmerscore=maxkmerscore, start=start, end=end, step=step) jffpbrunerlist.append(jffpbruner) else: chrblock = int(chrlen/spsize) + 1 for i in range(chrblock): start = i * spsize end = start + spsize - 1 if end >= chrlen: end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=args.jellyfish, jfkmerfile=jfkmerfile, mer=kmer, pyfasta=fastain, seqname=seqname, pblength=args.length, maxkmerscore=maxkmerscore, start=start, end=end, step=step) jffpbrunerlist.append(jffpbruner) jffinished = 0 for curpblist in jfpool.imap_unordered(jellyfish.kmerfilterprobe, jffpbrunerlist): jffilteredprobe.extend(curpblist) jffinished += 1 print("Jellyfish filter: ",jffinished,'/',len(jffpbrunerlist), sep='') jfpool.close() print('Jellyfish filter finished!!') tmppbfa = os.path.join(args.saved, os.path.basename(args.input)+'_tmp_probe.fa') tmppbfaio = open(tmppbfa, 'w') seqnum = 0 for tmppb in jffilteredprobe: print('>','seq',seqnum, sep='',file=tmppbfaio) print(tmppb,file=tmppbfaio) seqnum += 1 tmppbfaio.close() del jffilteredprobe bwafiltedpb = bwa.bwafilter(bwabin=args.bwa, reffile=bwaindex, inputfile=tmppbfa, minas=args.length, maxxs=int(args.length*args.homology/100), threadnumber=args.threads) # print(bwafiltedpb) tmpbwaftlist = os.path.join(args.saved, os.path.basename(args.input)+'.bed') alltmpbwaftlist = os.path.join(args.saved, os.path.basename(args.input)+'_all.bed') tmpbwaftlistio = open(tmpbwaftlist,'w') allbwaftlistio = open(alltmpbwaftlist,'w') seqlenfile = os.path.join(args.saved, os.path.basename(args.input)+'.len') seqlenio = open(seqlenfile,'w') seqlength = bwa.bwareflength(bwabin=args.bwa, reffile=bwaindex) for seqname in seqlength: print(seqname, seqlength[seqname], sep='\t', file=seqlenio) seqlenio.close() oligobefortmf = list() for pbtmp in bwafiltedpb: # print(pbtmp, file=tmpbwaftlistio) nowpbcounter = dict() nowpbcounter['seq'] = pbtmp nowpbcounter['dTm'] = args.dtm nowpbcounter['rprimer'] = args.primer oligobefortmf.append(nowpbcounter) keepedprobe = list() ctedpb = 0 oligobefortmflen = len(oligobefortmf) print("oligobefortmflen:",oligobefortmflen) pbftpool = Pool() for (pb, keep) in pbftpool.imap_unordered(probefilter, oligobefortmf): if keep: keepedprobe.append(pb) # print(pb, file=tmpbwaftlistio) ctedpb += 1 if ctedpb % 10000 == 0: print(ctedpb,'/',oligobefortmflen) pbdictbychr = dict() pbftpool.close() for pb in keepedprobe: seq, chro, start = pb.split('\t') start = int(start) if chro in pbdictbychr: pbdictbychr[chro][start] = seq else: pbdictbychr[chro] = dict() pbdictbychr[chro][start] = seq lenrprimer = len(args.primer) if lenrprimer == 0: lenrprimer = 5 slidwindow = lenrprimer+args.length for chro in pbdictbychr: startn = 0 for startnow in sorted(pbdictbychr[chro]): endnow = startnow + args.length - 1 print(chro, startnow, endnow, pbdictbychr[chro][startnow],file=allbwaftlistio,sep='\t') if startnow > startn+slidwindow: #startn = startnow+slidwindow startn = startnow print(chro, startnow, endnow, pbdictbychr[chro][startnow], file=tmpbwaftlistio, sep='\t') tmpbwaftlistio.close() allbwaftlistio.close() print("Job finshed!!")
def run(self): if self.kmerbuild: jfcounter = jellyfish.jfcount(jfpath=self.jellyfishpath, mer=self.kmer, infile=self.genomefile, output=self.jfkmerfile, threads=self.threadsnumber, lowercount=self.lowercount, size=self.size) """ check jelly fish count run correctly """ if jfcounter: self.progressnumber = self.progressnumber + 5 self.notifyProgress.emit(self.progressnumber) self.notifyMessage.emit("JellyFish Count finished...") else: self.notifyMessage.emit("JellyFish Count Error!!!") else: jfcountmess = "Use " + self.jfkmerfile self.progressnumber = self.progressnumber + 5 self.notifyProgress.emit(self.progressnumber) self.notifyMessage.emit(jfcountmess) if self.indexbuild: if self.aligner == 'BWA': bwa.bwaindex(self.alnpath, self.genomefile, self.samplefolder) self.notifyMessage.emit("BWA Index build finished...") self.progressnumber = self.progressnumber + 5 self.notifyProgress.emit(self.progressnumber) elif self.aligner == 'BLAT': """ add code for BLAT """ pass else: self.progressnumber = self.progressnumber + 5 self.notifyProgress.emit(self.progressnumber) """ load and splite input file """ # splite sequence longer than 10M spsize = 10000000 maxkmerscore = int(self.pblength * self.homology / 100) - self.kmer jffilteredprobe = list() fastain = Fasta(self.inputfile) jffpbrunerlist = list() for seqname in fastain.keys(): chrlen = len(fastain[seqname]) if chrlen < spsize: start = 0 end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=self.jellyfishpath, jfkmerfile=self.jfkmerfile, mer=self.kmer, pyfasta=fastain, seqname=seqname, pblength=self.pblength, maxkmerscore=maxkmerscore, start=start, end=end, step=self.step) jffpbrunerlist.append(jffpbruner) else: chrblock = int(chrlen / spsize) + 1 for i in range(chrblock): start = i * spsize end = start + spsize - 1 if end >= chrlen: end = chrlen - 1 jffpbruner = jellyfish.JFfpbruner(jfpath=self.jellyfishpath, jfkmerfile=self.jfkmerfile, mer=self.kmer, pyfasta=fastain, seqname=seqname, pblength=self.pblength, maxkmerscore=maxkmerscore, start=start, end=end, step=self.step) jffpbrunerlist.append(jffpbruner) jffinished = 0 for curpblist in self.pool.imap_unordered(jellyfish.kmerfilterprobe, jffpbrunerlist): jffilteredprobe.extend(curpblist) tmpprogress = float(format(self.progressnumber + (jffinished/len(jffpbrunerlist) * 40),".2f")) self.notifyProgress.emit(tmpprogress) if self.isRunning(): print("running") else: print("not running") jffinished += 1 self.notifyMessage.emit('jelly fish finished!!') self.progressnumber = 50.0 self.notifyProgress.emit(self.progressnumber) tmppbfa = os.path.join(self.samplefolder, os.path.basename(self.inputfile)+'_tmp_probes.fa') tmppbfaio = open(tmppbfa, 'w') seqnum = 0 for tmppb in jffilteredprobe: print('>','seq',seqnum, sep='',file=tmppbfaio) print(tmppb,file=tmppbfaio) seqnum += 1 tmppbfaio.close() #delete jffilteredprobe and release memory del jffilteredprobe bwaindexfile = os.path.join(self.samplefolder, os.path.basename(self.genomefile)) bwafiltedpb = bwa.bwafilter(bwabin=self.alnpath, reffile=bwaindexfile, inputfile=tmppbfa, minas=self.pblength, maxxs=int(self.pblength * self.homology / 100), threadnumber=self.threadsnumber) tmpbwaftlist = os.path.join(self.samplefolder, os.path.basename(self.inputfile)+'.bed') alltmpbwaftlist = os.path.join(self.samplefolder, os.path.basename(self.inputfile)+'_all.bed') tmpbwaftlistio = open(tmpbwaftlist,'w') allbwaftlistio = open(alltmpbwaftlist,'w') seqlenfile = os.path.join(self.samplefolder, os.path.basename(self.inputfile))+'.len' seqlenio = open(seqlenfile, 'w') seqlength = bwa.bwareflength(bwabin=self.alnpath, reffile=bwaindexfile) for seqname in seqlength: print(seqname, seqlength[seqname], sep='\t', file=seqlenio) seqlenio.close() oligobefortmf = list() for pbtmp in bwafiltedpb: # print(pbtmp, file=tmpbwaftlistio) nowpbcounter = dict() nowpbcounter['seq'] = pbtmp nowpbcounter['dTm'] = self.dTm nowpbcounter['rprimer'] = self.rprimer oligobefortmf.append(nowpbcounter) keepedprobe = list() self.progressnumber = 55 self.notifyProgress.emit(self.progressnumber) ctedpb = 0 oligobefortmflen = len(oligobefortmf) for (pb, keep) in self.pool.imap_unordered(probefilter, oligobefortmf): if keep: keepedprobe.append(pb) # print(pb, file=tmpbwaftlistio) ctedpb += 1 if ctedpb % 10000 == 0: tmpprogress = float(format(self.progressnumber + (ctedpb/oligobefortmflen * 30),".2f")) self.notifyProgress.emit(tmpprogress) self.notifyProgress.emit(90) pbdictbychr = dict() #load pb to dict for pb in keepedprobe: # print(pb, file=tmpbwaftlistio) seq, chro, start = pb.split('\t') start = int(start) if chro in pbdictbychr: pbdictbychr[chro][start] = seq else: pbdictbychr[chro] = dict() pbdictbychr[chro][start] = seq #get lenth of primer lenrprimer = len(self.rprimer) if lenrprimer == 0: lenrprimer = 5 slidwindow = lenrprimer+self.pblength for chro in pbdictbychr: startn = 0 for startnow in sorted(pbdictbychr[chro]): endnow = startnow + self.pblength - 1 print(chro, startnow, endnow, pbdictbychr[chro][startnow],file=allbwaftlistio,sep='\t') if startnow > startn+slidwindow: #startn = startnow+slidwindow startn = startnow print(chro, startnow, endnow, pbdictbychr[chro][startnow], file=tmpbwaftlistio, sep='\t') tmpbwaftlistio.close() allbwaftlistio.close() #remove temp fasta file # os.remove(tmppbfa) self.notifyProgress.emit(100) self.notifyMessage.emit('all finished!!')