コード例 #1
0
    def __init__(self, StageName, varsP):
        self.refineStage = StageName
        self.multigroup = True  #if False, force single group (not normally good)
        self.varsP = varsP
        ContigPrefix = self.varsP.expID + "_" + StageName

        if StageName == "extension0":
            self.varsP.extensionCount += 1

        for case in util.switch(StageName):
            if case("refine(B0|B1|Final0|Final1)", regexp=True):
                self.bunching = 12
                self.ref_arg = "-reff"
                break
            if case("refineA"):
                self.bunching = 12
                self.ref_arg = "-ref"
                break
            if case("refineNGS"):
                self.bunching = 1
                self.ref_arg = "-ref"
                self.varsP.inputContigPrefix = self.varsP.ngsContigPrefix
                self.varsP.inputContigFolder = self.varsP.ngsInDir
                break
            if case("extension[01]", regexp=True):
                self.bunching = 12
                self.ref_arg = "-reff"
                ContigPrefix = self.varsP.expID + "_" + StageName + '_%s' % self.varsP.extensionCount
                break
コード例 #2
0
    def generateJobList(self):
        baseArgs1 = self.varsP.argsListed(self.refineStage)

        for case in util.switch(self.refineStage):
            if case("refine(B1|Final1)", regexp=True):
                baseArgs1 += self.varsP.argsListed('noise0')
                ContigGroupList = self.findGroupedContigs()
                r1args = [self.varsP.RefAlignerBin]
                break
            if case("refine(B0|Final0)", regexp=True):
                baseArgs1 += self.varsP.argsListed('noise0')
                ContigGroupListFull = self.groupContigs()
                setattr(self.varsP, "count_" + self.varsP.outputContigPrefix,
                        (ContigGroupListFull))
                #print self.varsP.outputContigPrefix, getattr(self.varsP, "count_"+self.varsP.outputContigPrefix)
                #r1args = [self.varsP.RefAlignerBin, '-i', self.varsP.bnxFile]
                #InputFileList=[self.varsP.bnxFile]
                r1args = [self.varsP.RefAlignerBin]
                ContigGroupList = zip(
                    range(1, self.varsP.nPairwiseJobs + 1),
                    range(1, self.varsP.nPairwiseJobs + 1), [
                        self.varsP.bnxFile.replace(
                            ".bnx", "_%s_of_%s.bnx" %
                            (x, self.varsP.nPairwiseJobs))
                        for x in range(1, self.varsP.nPairwiseJobs + 1)
                    ], [1] * self.varsP.nPairwiseJobs)
                break
コード例 #3
0
    def __init__(self, StageName, varsP):
        self.refineStage = StageName
        self.multigroup = True #if False, force single group (not normally good)
        self.varsP = varsP
        ContigPrefix = self.varsP.expID + "_" + StageName
        
        if StageName=="extension0":
		self.varsP.extensionCount += 1
		
        
	for case in util.switch(StageName):
		if case("refine(B0|B1|Final0|Final1)", regexp=True):
			self.bunching=12
			self.ref_arg="-reff"
			break
		if case("refineA"):
			self.bunching=12
			self.ref_arg="-ref"
			break
		if case("refineNGS"):
			self.bunching=1
			self.ref_arg="-ref"
			self.varsP.inputContigPrefix = self.varsP.ngsContigPrefix
			self.varsP.inputContigFolder = self.varsP.ngsInDir
			break
		if case("extension[01]", regexp=True):
			self.bunching=12
			self.ref_arg="-reff"
			ContigPrefix = self.varsP.expID + "_"+ StageName+'_%s' % self.varsP.extensionCount
			break;
		if case():
			#varsP.error += 1 #these don't do anything
			#varsP.message += '  Error: Refine stage name invalid: '+str(StageName)+'\n'
			self.varsP.updatePipeReport("Internal error: unknown stage %s" % StageName)
			return

        clusargs = varsP.getClusterArgs(StageName) #get arguments before changing StageName, then add suffix
        StageName += (("_%i" % self.varsP.extensionCount) if StageName.startswith("extension") else "") #for status.xml only
        self.varsP.stageName=StageName
        util.LogStatus("progress", "stage_start", StageName)
        #super is more pythonic than referring to the base class explicitly (only matters for multiple inheritance)
        super(Refine, self).__init__(varsP, StageName, clusterArgs=clusargs)
        #intermediateContigPrefix = self.varsP.expID + self.StageName.replace("refine", "_r")
	self.varsP.prepareContigIO(ContigPrefix, StageName)	
        #modify results of varsP.prepareContigIO for special case of refineNGS        
        self.generateJobList()
コード例 #4
0
            if threadBoost < 1:
                threadBoost = 1
            minthreads = self.varsP.getClusterArgs(self.refineStage,
                                                   category="MinThreads")
            if minthreads:
                minthreads = Template(minthreads).substitute(
                    maxthreads=self.varsP.maxthreads)
            else:
                minthreads = self.varsP.maxthreads
            nthreads = float(minthreads)
            nthreads = int(round(nthreads * threadBoost))
            if nthreads > self.varsP.maxthreads:
                nthreads = self.varsP.maxthreads
#        for contigID, contig in ContigGroupList :
            jobName = self.refineStage + ' %5s' % contigID
            for case in util.switch(self.refineStage):
                if case("refineA"):
                    endId = int(rawContigID) + self.bunching - 1
                    if m + 1 < len(ContigGroupList):
                        endId = int(ContigGroupList[m + 1][1]) - 1
                    currentArgs = [
                        str(rawContigID),
                        str(endId), '-maxthreads',
                        str(nthreads)
                    ]  #this must come after r1args because it's actually an argument to -contigs
                    #currentArgs = r1args + currentArgs + baseArgs1 + ['-id', str(contigID), '-i', contig+"_mapped.bnx", '-o', output1String]
                    currentArgs = r1args + currentArgs + [
                        '-o', output1String
                    ] + stdarg + baseArgs1
                    expectedOutputString = self.varsP.outputContigPrefix + '_contig' + str(
                        rawContigID)
コード例 #5
0
    def generateJobList(self):
        baseArgs1 = self.varsP.argsListed(self.refineStage)
        
	for case in util.switch(self.refineStage):
		if case("refine(B1|Final1)", regexp=True):
			baseArgs1 += self.varsP.argsListed('noise0')
			ContigGroupList=self.findGroupedContigs()
			r1args = [self.varsP.RefAlignerBin]
			break
		if case("refine(B0|Final0)", regexp=True):
			baseArgs1 += self.varsP.argsListed('noise0')
			ContigGroupListFull=self.groupContigs()
			setattr(self.varsP, "count_"+self.varsP.outputContigPrefix, (ContigGroupListFull))
			#print self.varsP.outputContigPrefix, getattr(self.varsP, "count_"+self.varsP.outputContigPrefix)
			#r1args = [self.varsP.RefAlignerBin, '-i', self.varsP.bnxFile]
			#InputFileList=[self.varsP.bnxFile]
			r1args = [self.varsP.RefAlignerBin]
			ContigGroupList = zip(range(1,self.varsP.nPairwiseJobs + 1), range(1,self.varsP.nPairwiseJobs + 1), [self.varsP.bnxFile.replace(".bnx", "_%s_of_%s.bnx" %(x, self.varsP.nPairwiseJobs)) for x in range(1,self.varsP.nPairwiseJobs + 1)], [1]*self.varsP.nPairwiseJobs)
			break
		if case("refineA"):
			baseArgs1 += self.varsP.argsListed('noise0')
			ContigGroupList=self.groupContigs()
			#print("Found %d groups for refineA" % (len(ContigGroupList)))
			#r1args = [self.varsP.AssemblerBin, '-i', self.varsP.bnxFile.replace(".bnx", "_sorted.bnx")] #need this before -contigs -- can no longer use all_sorted.bnx due to scan scaling: must refer to varsP.sorted_file
			#r1args = [self.varsP.AssemblerBin, '-i', self.varsP.sorted_file+".bnx"] #need this before -contigs
			r1args = [self.varsP.AssemblerBin, '-if', self.varsP.bnxFileList] #need this before -contigs; use split files in case splitting changed (eg due to scan scaling producing labels at < 20 bp)
			r1args += ['-contigs', os.path.join(self.varsP.inputContigFolder, self.varsP.inputContigPrefix) + '.contigs']
			break
		if case("refineNGS"):
			r1args = [self.varsP.RefAlignerBin, '-i', self.varsP.bnxFile]
			ContigGroupList=self.groupContigs()
			break
		if case("extension0"):
			baseArgs1 += self.varsP.argsListed('noise0')
			ContigGroupList=self.groupContigs()
			setattr(self.varsP, "count_"+self.varsP.outputContigPrefix, (ContigGroupList))
			#print self.varsP.outputContigPrefix, getattr(self.varsP, "count_"+self.varsP.outputContigPrefix), self.varsP.inputContigFolder, self.varsP.inputContigPrefix
			#r1args = [self.varsP.RefAlignerBin, '-i', self.varsP.bnxFile]
			#InputFileList=[self.varsP.bnxFile]
			r1args = [self.varsP.RefAlignerBin]
			ContigGroupList = zip(range(1,self.varsP.nPairwiseJobs + 1), range(1,self.varsP.nPairwiseJobs + 1), [self.varsP.bnxFile.replace(".bnx", "_%s_of_%s.bnx" %(x, self.varsP.nPairwiseJobs)) for x in range(1,self.varsP.nPairwiseJobs + 1)], [1]*self.varsP.nPairwiseJobs)
			break;
		if case("extension1"):
			baseArgs1 += self.varsP.argsListed('noise0')
			ContigGroupList=self.findGroupedContigs()
			r1args = [self.varsP.RefAlignerBin]
			break;
		if case():
			varsP.error += 1
			varsP.message += '  Error: Refine stage name invalid: '+str(StageName)+'\n'
			return


        stdarg = []
        if self.varsP.stdoutlog : #this is the same for all cases below
            stdarg = ['-stdout', '-stderr'] 

	#contigFiles, contigIDs = self.varsP.findContigs(self.varsP.inputContigFolder, self.varsP.inputContigPrefix)
        #nJobs = len(contigFiles)
        output1String = os.path.join(self.varsP.outputContigFolder, self.varsP.outputContigPrefix)
        #for jobNum in range(1,nJobs + 1):
            #contigID = contigIDs[jobNum - 1]
        for m in range(0, len(ContigGroupList)):
	    contigID=ContigGroupList[m][0]
	    rawContigID=ContigGroupList[m][1]
	    contig=ContigGroupList[m][2]
	    
	    # Figure out desired number of threads to use
	    threadBoost=ceil(ContigGroupList[m][3])
	    if threadBoost<1:
		    threadBoost=1
	    minthreads=self.varsP.getClusterArgs(self.refineStage, category="MinThreads")
	    if minthreads:
		minthreads=Template(minthreads).substitute(maxthreads=self.varsP.maxthreads)
	    else:
		minthreads=self.varsP.maxthreads
	    nthreads=float(minthreads)
	    nthreads=int(round(nthreads*threadBoost))
	    if nthreads>self.varsP.maxthreads:
		    nthreads=self.varsP.maxthreads
#        for contigID, contig in ContigGroupList :		
            jobName = self.refineStage + ' %5s' % contigID
	    for case in util.switch(self.refineStage):
		    if case("refineA"):
			endId=int(rawContigID)+self.bunching-1
			if m+1<len(ContigGroupList) :
				endId=int(ContigGroupList[m+1][1])-1
			currentArgs = [str(rawContigID), str(endId), '-maxthreads', str(nthreads)] #this must come after r1args because it's actually an argument to -contigs
			#currentArgs = r1args + currentArgs + baseArgs1 + ['-id', str(contigID), '-i', contig+"_mapped.bnx", '-o', output1String]
			currentArgs = r1args + currentArgs + ['-o', output1String] + stdarg + baseArgs1
			expectedOutputString = self.varsP.outputContigPrefix + '_contig' + str(rawContigID)
			expectedResultFile = os.path.join(self.varsP.outputContigFolder, expectedOutputString + '.cmap') #refineB
			expectedStdoutFile = output1String + "_id"+str(rawContigID)+".stdout"
			break
			
		    #if case("refineB1|refineFinal1|extension1", regexp=True):
			## TODO: make thread number configurable from clusterArgs
			#currentArgs = ['-maxthreads', str(16), self.ref_arg, contig]
			#currentArgs = r1args + currentArgs + baseArgs1 + ['-id', str(contigID), '-i', contig+"_mapped.bnx", '-o', output1String]
			#expectedOutputString = self.varsP.outputContigPrefix + '_contig' + str(contigID)
			#expectedResultFile = os.path.join(self.varsP.outputContigFolder, expectedOutputString + '.cmap') #refineB
			#break

		    if case("refineB1|refineFinal1|extension1", regexp=True):
			Inputs=zip(["-i"]*self.varsP.nPairwiseJobs, [contig.replace("_group", "_group"+str(i)+"_mapped_group")+".bnx" for i in range(1,self.varsP.nPairwiseJobs + 1)])
			Inputs=[x for t in Inputs for x in t]
                        #-id must come before -o, otherwise expectedStdoutFile is wrong
			currentArgs = ['-maxthreads', str(nthreads), '-id', str(contigID), '-o', output1String, self.ref_arg, contig]
			currentArgs = r1args + currentArgs + stdarg + baseArgs1 + Inputs 
			expectedOutputString = self.varsP.outputContigPrefix + '_contig' + str(rawContigID)
			expectedResultFile = os.path.join(self.varsP.outputContigFolder, expectedOutputString + '.cmap') #refineB
			expectedStdoutFile = output1String + "_id"+str(contigID)+".stdout"
			break
			
		    #if case("refineB0|refineFinal0|extension0", regexp=True):
			#currentArgs = ['-maxthreads', str(self.varsP.maxthreads), self.ref_arg, contig]
			#currentArgs = r1args + currentArgs + baseArgs1 + ['-mapped-unsplit', '1', '-refine', '0', '-mapped', contig+"_mapped", "-o", "/dev/null"]
			#expectedOutputString =  self.refineStage + "contig"+str(contigID) + "_mapped.bnx"
			#expectedResultFile = contig + "_mapped.bnx" #refineB
			#break
			
		    if case("refineB0|refineFinal0|extension0", regexp=True):
			currentArgs = [ '-maxthreads', str(nthreads), "-ref", os.path.join(self.varsP.inputContigFolder, util.uniquifyContigName(self.varsP.inputContigPrefix)+".cmap")]
                        outputfile = os.path.join(self.varsP.outputContigFolder, self.varsP.outputContigPrefix+'_group'+str(contigID))
                        #-id must come before -o, otherwise expectedStdoutFile is wrong
			currentArgs = r1args + ['-i', contig, '-id', str(contigID), '-o', outputfile] + stdarg + currentArgs + baseArgs1
                        currentArgs += ['-refine', '0', '-grouped', os.path.join(self.varsP.outputContigFolder, self.varsP.outputContigPrefix+'_group_manifest'), '-mapped', os.path.join(self.varsP.outputContigFolder, self.varsP.outputContigPrefix+'_group'+str(contigID)+"_mapped"), '-output-filter', ".*.bnx"]
			expectedOutputString = self.varsP.outputContigPrefix+'_group'+str(contigID)+"_mapped.bnx"
			expectedResultFile = outputfile + "_mapped_group1.bnx" 
			expectedStdoutFile = outputfile + "_id"+str(contigID)+ ".stdout"
			break
			
		    if case():
			self.varsP.updatePipeReport("Internal error: cannot handle stage %s" % (self.refineStage))
			raise ValueError
                
            if self.varsP.bnxStatsFile!=None:
		currentArgs.extend(['-XmapStatRead', self.varsP.bnxStatsFile])
		    		    
            s1Job = mthread.singleJob(currentArgs, 
                                    jobName, 
                                    expectedResultFile, 
                                    expectedOutputString,
                                    maxThreads=nthreads,
                                    clusterLogDir=self.varsP.clusterLogDir,
                                    expectedStdoutFile=expectedStdoutFile,
                                    )
            self.addJob(s1Job)
        self.logArguments()