def __init__(self, StageName, varsP): self.refineStage = StageName self.multigroup = True #if False, force single group (not normally good) self.varsP = varsP ContigPrefix = self.varsP.expID + "_" + StageName if StageName == "extension0": self.varsP.extensionCount += 1 for case in util.switch(StageName): if case("refine(B0|B1|Final0|Final1)", regexp=True): self.bunching = 12 self.ref_arg = "-reff" break if case("refineA"): self.bunching = 12 self.ref_arg = "-ref" break if case("refineNGS"): self.bunching = 1 self.ref_arg = "-ref" self.varsP.inputContigPrefix = self.varsP.ngsContigPrefix self.varsP.inputContigFolder = self.varsP.ngsInDir break if case("extension[01]", regexp=True): self.bunching = 12 self.ref_arg = "-reff" ContigPrefix = self.varsP.expID + "_" + StageName + '_%s' % self.varsP.extensionCount break
def generateJobList(self): baseArgs1 = self.varsP.argsListed(self.refineStage) for case in util.switch(self.refineStage): if case("refine(B1|Final1)", regexp=True): baseArgs1 += self.varsP.argsListed('noise0') ContigGroupList = self.findGroupedContigs() r1args = [self.varsP.RefAlignerBin] break if case("refine(B0|Final0)", regexp=True): baseArgs1 += self.varsP.argsListed('noise0') ContigGroupListFull = self.groupContigs() setattr(self.varsP, "count_" + self.varsP.outputContigPrefix, (ContigGroupListFull)) #print self.varsP.outputContigPrefix, getattr(self.varsP, "count_"+self.varsP.outputContigPrefix) #r1args = [self.varsP.RefAlignerBin, '-i', self.varsP.bnxFile] #InputFileList=[self.varsP.bnxFile] r1args = [self.varsP.RefAlignerBin] ContigGroupList = zip( range(1, self.varsP.nPairwiseJobs + 1), range(1, self.varsP.nPairwiseJobs + 1), [ self.varsP.bnxFile.replace( ".bnx", "_%s_of_%s.bnx" % (x, self.varsP.nPairwiseJobs)) for x in range(1, self.varsP.nPairwiseJobs + 1) ], [1] * self.varsP.nPairwiseJobs) break
def __init__(self, StageName, varsP): self.refineStage = StageName self.multigroup = True #if False, force single group (not normally good) self.varsP = varsP ContigPrefix = self.varsP.expID + "_" + StageName if StageName=="extension0": self.varsP.extensionCount += 1 for case in util.switch(StageName): if case("refine(B0|B1|Final0|Final1)", regexp=True): self.bunching=12 self.ref_arg="-reff" break if case("refineA"): self.bunching=12 self.ref_arg="-ref" break if case("refineNGS"): self.bunching=1 self.ref_arg="-ref" self.varsP.inputContigPrefix = self.varsP.ngsContigPrefix self.varsP.inputContigFolder = self.varsP.ngsInDir break if case("extension[01]", regexp=True): self.bunching=12 self.ref_arg="-reff" ContigPrefix = self.varsP.expID + "_"+ StageName+'_%s' % self.varsP.extensionCount break; if case(): #varsP.error += 1 #these don't do anything #varsP.message += ' Error: Refine stage name invalid: '+str(StageName)+'\n' self.varsP.updatePipeReport("Internal error: unknown stage %s" % StageName) return clusargs = varsP.getClusterArgs(StageName) #get arguments before changing StageName, then add suffix StageName += (("_%i" % self.varsP.extensionCount) if StageName.startswith("extension") else "") #for status.xml only self.varsP.stageName=StageName util.LogStatus("progress", "stage_start", StageName) #super is more pythonic than referring to the base class explicitly (only matters for multiple inheritance) super(Refine, self).__init__(varsP, StageName, clusterArgs=clusargs) #intermediateContigPrefix = self.varsP.expID + self.StageName.replace("refine", "_r") self.varsP.prepareContigIO(ContigPrefix, StageName) #modify results of varsP.prepareContigIO for special case of refineNGS self.generateJobList()
if threadBoost < 1: threadBoost = 1 minthreads = self.varsP.getClusterArgs(self.refineStage, category="MinThreads") if minthreads: minthreads = Template(minthreads).substitute( maxthreads=self.varsP.maxthreads) else: minthreads = self.varsP.maxthreads nthreads = float(minthreads) nthreads = int(round(nthreads * threadBoost)) if nthreads > self.varsP.maxthreads: nthreads = self.varsP.maxthreads # for contigID, contig in ContigGroupList : jobName = self.refineStage + ' %5s' % contigID for case in util.switch(self.refineStage): if case("refineA"): endId = int(rawContigID) + self.bunching - 1 if m + 1 < len(ContigGroupList): endId = int(ContigGroupList[m + 1][1]) - 1 currentArgs = [ str(rawContigID), str(endId), '-maxthreads', str(nthreads) ] #this must come after r1args because it's actually an argument to -contigs #currentArgs = r1args + currentArgs + baseArgs1 + ['-id', str(contigID), '-i', contig+"_mapped.bnx", '-o', output1String] currentArgs = r1args + currentArgs + [ '-o', output1String ] + stdarg + baseArgs1 expectedOutputString = self.varsP.outputContigPrefix + '_contig' + str( rawContigID)
def generateJobList(self): baseArgs1 = self.varsP.argsListed(self.refineStage) for case in util.switch(self.refineStage): if case("refine(B1|Final1)", regexp=True): baseArgs1 += self.varsP.argsListed('noise0') ContigGroupList=self.findGroupedContigs() r1args = [self.varsP.RefAlignerBin] break if case("refine(B0|Final0)", regexp=True): baseArgs1 += self.varsP.argsListed('noise0') ContigGroupListFull=self.groupContigs() setattr(self.varsP, "count_"+self.varsP.outputContigPrefix, (ContigGroupListFull)) #print self.varsP.outputContigPrefix, getattr(self.varsP, "count_"+self.varsP.outputContigPrefix) #r1args = [self.varsP.RefAlignerBin, '-i', self.varsP.bnxFile] #InputFileList=[self.varsP.bnxFile] r1args = [self.varsP.RefAlignerBin] ContigGroupList = zip(range(1,self.varsP.nPairwiseJobs + 1), range(1,self.varsP.nPairwiseJobs + 1), [self.varsP.bnxFile.replace(".bnx", "_%s_of_%s.bnx" %(x, self.varsP.nPairwiseJobs)) for x in range(1,self.varsP.nPairwiseJobs + 1)], [1]*self.varsP.nPairwiseJobs) break if case("refineA"): baseArgs1 += self.varsP.argsListed('noise0') ContigGroupList=self.groupContigs() #print("Found %d groups for refineA" % (len(ContigGroupList))) #r1args = [self.varsP.AssemblerBin, '-i', self.varsP.bnxFile.replace(".bnx", "_sorted.bnx")] #need this before -contigs -- can no longer use all_sorted.bnx due to scan scaling: must refer to varsP.sorted_file #r1args = [self.varsP.AssemblerBin, '-i', self.varsP.sorted_file+".bnx"] #need this before -contigs r1args = [self.varsP.AssemblerBin, '-if', self.varsP.bnxFileList] #need this before -contigs; use split files in case splitting changed (eg due to scan scaling producing labels at < 20 bp) r1args += ['-contigs', os.path.join(self.varsP.inputContigFolder, self.varsP.inputContigPrefix) + '.contigs'] break if case("refineNGS"): r1args = [self.varsP.RefAlignerBin, '-i', self.varsP.bnxFile] ContigGroupList=self.groupContigs() break if case("extension0"): baseArgs1 += self.varsP.argsListed('noise0') ContigGroupList=self.groupContigs() setattr(self.varsP, "count_"+self.varsP.outputContigPrefix, (ContigGroupList)) #print self.varsP.outputContigPrefix, getattr(self.varsP, "count_"+self.varsP.outputContigPrefix), self.varsP.inputContigFolder, self.varsP.inputContigPrefix #r1args = [self.varsP.RefAlignerBin, '-i', self.varsP.bnxFile] #InputFileList=[self.varsP.bnxFile] r1args = [self.varsP.RefAlignerBin] ContigGroupList = zip(range(1,self.varsP.nPairwiseJobs + 1), range(1,self.varsP.nPairwiseJobs + 1), [self.varsP.bnxFile.replace(".bnx", "_%s_of_%s.bnx" %(x, self.varsP.nPairwiseJobs)) for x in range(1,self.varsP.nPairwiseJobs + 1)], [1]*self.varsP.nPairwiseJobs) break; if case("extension1"): baseArgs1 += self.varsP.argsListed('noise0') ContigGroupList=self.findGroupedContigs() r1args = [self.varsP.RefAlignerBin] break; if case(): varsP.error += 1 varsP.message += ' Error: Refine stage name invalid: '+str(StageName)+'\n' return stdarg = [] if self.varsP.stdoutlog : #this is the same for all cases below stdarg = ['-stdout', '-stderr'] #contigFiles, contigIDs = self.varsP.findContigs(self.varsP.inputContigFolder, self.varsP.inputContigPrefix) #nJobs = len(contigFiles) output1String = os.path.join(self.varsP.outputContigFolder, self.varsP.outputContigPrefix) #for jobNum in range(1,nJobs + 1): #contigID = contigIDs[jobNum - 1] for m in range(0, len(ContigGroupList)): contigID=ContigGroupList[m][0] rawContigID=ContigGroupList[m][1] contig=ContigGroupList[m][2] # Figure out desired number of threads to use threadBoost=ceil(ContigGroupList[m][3]) if threadBoost<1: threadBoost=1 minthreads=self.varsP.getClusterArgs(self.refineStage, category="MinThreads") if minthreads: minthreads=Template(minthreads).substitute(maxthreads=self.varsP.maxthreads) else: minthreads=self.varsP.maxthreads nthreads=float(minthreads) nthreads=int(round(nthreads*threadBoost)) if nthreads>self.varsP.maxthreads: nthreads=self.varsP.maxthreads # for contigID, contig in ContigGroupList : jobName = self.refineStage + ' %5s' % contigID for case in util.switch(self.refineStage): if case("refineA"): endId=int(rawContigID)+self.bunching-1 if m+1<len(ContigGroupList) : endId=int(ContigGroupList[m+1][1])-1 currentArgs = [str(rawContigID), str(endId), '-maxthreads', str(nthreads)] #this must come after r1args because it's actually an argument to -contigs #currentArgs = r1args + currentArgs + baseArgs1 + ['-id', str(contigID), '-i', contig+"_mapped.bnx", '-o', output1String] currentArgs = r1args + currentArgs + ['-o', output1String] + stdarg + baseArgs1 expectedOutputString = self.varsP.outputContigPrefix + '_contig' + str(rawContigID) expectedResultFile = os.path.join(self.varsP.outputContigFolder, expectedOutputString + '.cmap') #refineB expectedStdoutFile = output1String + "_id"+str(rawContigID)+".stdout" break #if case("refineB1|refineFinal1|extension1", regexp=True): ## TODO: make thread number configurable from clusterArgs #currentArgs = ['-maxthreads', str(16), self.ref_arg, contig] #currentArgs = r1args + currentArgs + baseArgs1 + ['-id', str(contigID), '-i', contig+"_mapped.bnx", '-o', output1String] #expectedOutputString = self.varsP.outputContigPrefix + '_contig' + str(contigID) #expectedResultFile = os.path.join(self.varsP.outputContigFolder, expectedOutputString + '.cmap') #refineB #break if case("refineB1|refineFinal1|extension1", regexp=True): Inputs=zip(["-i"]*self.varsP.nPairwiseJobs, [contig.replace("_group", "_group"+str(i)+"_mapped_group")+".bnx" for i in range(1,self.varsP.nPairwiseJobs + 1)]) Inputs=[x for t in Inputs for x in t] #-id must come before -o, otherwise expectedStdoutFile is wrong currentArgs = ['-maxthreads', str(nthreads), '-id', str(contigID), '-o', output1String, self.ref_arg, contig] currentArgs = r1args + currentArgs + stdarg + baseArgs1 + Inputs expectedOutputString = self.varsP.outputContigPrefix + '_contig' + str(rawContigID) expectedResultFile = os.path.join(self.varsP.outputContigFolder, expectedOutputString + '.cmap') #refineB expectedStdoutFile = output1String + "_id"+str(contigID)+".stdout" break #if case("refineB0|refineFinal0|extension0", regexp=True): #currentArgs = ['-maxthreads', str(self.varsP.maxthreads), self.ref_arg, contig] #currentArgs = r1args + currentArgs + baseArgs1 + ['-mapped-unsplit', '1', '-refine', '0', '-mapped', contig+"_mapped", "-o", "/dev/null"] #expectedOutputString = self.refineStage + "contig"+str(contigID) + "_mapped.bnx" #expectedResultFile = contig + "_mapped.bnx" #refineB #break if case("refineB0|refineFinal0|extension0", regexp=True): currentArgs = [ '-maxthreads', str(nthreads), "-ref", os.path.join(self.varsP.inputContigFolder, util.uniquifyContigName(self.varsP.inputContigPrefix)+".cmap")] outputfile = os.path.join(self.varsP.outputContigFolder, self.varsP.outputContigPrefix+'_group'+str(contigID)) #-id must come before -o, otherwise expectedStdoutFile is wrong currentArgs = r1args + ['-i', contig, '-id', str(contigID), '-o', outputfile] + stdarg + currentArgs + baseArgs1 currentArgs += ['-refine', '0', '-grouped', os.path.join(self.varsP.outputContigFolder, self.varsP.outputContigPrefix+'_group_manifest'), '-mapped', os.path.join(self.varsP.outputContigFolder, self.varsP.outputContigPrefix+'_group'+str(contigID)+"_mapped"), '-output-filter', ".*.bnx"] expectedOutputString = self.varsP.outputContigPrefix+'_group'+str(contigID)+"_mapped.bnx" expectedResultFile = outputfile + "_mapped_group1.bnx" expectedStdoutFile = outputfile + "_id"+str(contigID)+ ".stdout" break if case(): self.varsP.updatePipeReport("Internal error: cannot handle stage %s" % (self.refineStage)) raise ValueError if self.varsP.bnxStatsFile!=None: currentArgs.extend(['-XmapStatRead', self.varsP.bnxStatsFile]) s1Job = mthread.singleJob(currentArgs, jobName, expectedResultFile, expectedOutputString, maxThreads=nthreads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=expectedStdoutFile, ) self.addJob(s1Job) self.logArguments()