コード例 #1
0
 def getAlignStats(self):
     """Open output files of alignment jobs and report on statistics.
     """
     #MapClassesRev stores totAssemblyLenMb
     self.varsP.updatePipeReport(
         "Starting AlignModule Align Stats stage for %s\n" % self.stageName,
         printalso=True)
     util.LogStatus("progress", "stage_start", "%s_stats" % self.stageName)
     if self.doref:
         reflen = mc.multiCmap(self.varsP.ref,
                               lengthonly=True).totalLength / 1e6
     else:
         reflen = self.varsP.totAssemblyLenMb
     getAlignStats(self.varsP,
                   self.outFileList,
                   reflen,
                   isref=self.doref,
                   mergepath=self.mergedir)
     mergeMap(self.varsP, self.outFileList, mergepath=self.mergedir)
     splitByContig = (2 if self.doref else 0)  #see mergeRcmaps
     stageName = (self.varsP.alignMolvrefName if self.doref else "")
     mergeRcmaps(self.outFileList, self.mergedir, self.varsP, splitByContig,
                 stageName)
     #xmapDict = split_XMap_byContig( self.outFileList, self.mergedir, self.varsP, self.stageName)
     xmapDict = split_XMap_byContig_new(self.outFileList, self.mergedir,
                                        self.varsP, stageName)
     #split_Qcmap_byContig(self.outFileList, self.mergedir, xmapDict, self.varsP)
     split_Qcmap_byContig_new(self.outFileList, self.mergedir, xmapDict,
                              self.varsP, stageName)
     self.varsP.updatePipeReport(
         "Finished AlignModule Align Stats stage for %s\n" % self.stageName,
         printalso=True)
     util.LogStatus("progress", "stage_complete",
                    "%s_stats" % self.stageName)
コード例 #2
0
 def __init__(self, varsP) :
     """sortBNX.__init__: this class is for sorting the input bnx
     for subsequent splitting by the splitBNX class, and eventually
     easier processing with the Pairwise class. The constructor
     (this) will call varsP.runJobs and doAllPipeReport."""
     self.stageName="SortBNX"
     self.varsP = varsP #fewer code modifications below
     self.varsP.sorted_file = self.varsP.bnxFile.replace(".bnx", "_sorted")
     #replace this with checkMinMol; this needs to use sorted file which isn't yet made
     #calculateNPairwise(self.varsP, self.varsP.bnxFile.replace(".bnx","")) #run this here bc it contains check on N mol required to start pipeline
     checkMinMol(self.varsP, self.varsP.bnxFile)
     if self.generateJobList() : #return 0 for success, 1 for skip
         if not util.checkFile(self.varsP.sorted_file+".bnx") : #this happens when accidentally using bypass but no sorted bnx exists--log error
             err = "ERROR: no sorted bnx file found (%s) (check bypass (-B) argument to Pipeline)" % (self.varsP.sorted_file+".bnx")
             self.varsP.updatePipeReport(err+"\n")
             util.LogError("critical", err)
             util.LogStatus("progress", "pipeline", "failure")
             raise RuntimeError
         #calculateNPairwise(self.varsP, self.varsP.sorted_file) #correct varsP.nPairwiseJobs -- already above
         return
     util.LogStatus("progress", "stage_start", self.stageName) #after above bc check if bypass (executeCurrentStage)
     self.varsP.runJobs(self, "SortBNX")
     self.doAllPipeReport()
     if not self.allResultsFound() :
         err = "ERROR: sortBNX failed. Check: "+self.varsP.bnxFile
         self.varsP.updatePipeReport(err+"\n")
         util.LogError("critical", err)
         util.LogStatus("progress", "pipeline", "failure")
         raise RuntimeError
     util.LogStatus("progress", "stage_complete", self.stageName)
コード例 #3
0
 def __init__(self, varsP, argset=-1):
     '''argset is toggle between CharacterizeDefault and CharacterizeFinal argumets:
     -1 is default, 1 is final
     '''
     self.varsP = varsP
     self.argStr = (
         "Final" if argset == 1 else "Default"
     )  #!=1 and !=-1 is error in generateJobList, but not here
     self.stageName = 'Characterize' + self.argStr + ' ' + self.varsP.stageComplete
     util.LogStatus("progress", "stage_start", self.stageName)
     mthread.jobWrapper.__init__(
         self,
         varsP,
         self.stageName,
         clusterArgs=varsP.getClusterArgs('characterizeDefault'))
     self.xmapTarget = None
     self.curCharacterizeFileRoots = []
     outdir = self.varsP.characterizeDirName  # = 'alignref'
     if argset == 1:  #this is final
         outdir += '_final'
     varsP.contigAlignTarget = os.path.join(varsP.outputContigFolder,
                                            outdir)
     if not (os.path.exists(varsP.contigAlignTarget)):
         os.mkdir(varsP.contigAlignTarget)
     self.generateJobList(argset)
コード例 #4
0
 def isBadErrorParams(self, noise, stage):
     #BAD means this:
     # for both stages: sr > 0.1 or sd > 0.1 or sf > 0.5
     # also for stage 0 : sd > 0.1 and sf > 0.35
     # also for stage 1 : sd > 0 and sf > 0.25 (this used to be for both stages)
     assert stage == 0 or stage == 1, "Error: invalid arg to autoNoise.isBadErrorParams"
     badparam = False
     if not noise :
         badparam = True
     elif stage == 0 and (noise["sd"] > 0.1 and noise["sf"] > 0.35) :
         badparam = True
     elif stage == 1 and (noise["sd"] > 0   and noise["sf"] > 0.25) :
         badparam = True
     #add not noise for case of empty dict, which readNoiseParameters will return if it can't read the .err file
     if badparam or noise["sr"] > 0.1 or noise["sd"] > 0.1 or noise["sf"] > 0.5 :
         errstr = "Failed to find usable noise parameters. Try decreasing maprate parameter and/or find a better reference. You can also try disabling auto noise (no -y, or 'Rough assembly' profile) with nominal noise parameters;"
         if noise.has_key("sf") :
             errstr += " sf=%f" % noise["sf"]
         if noise.has_key("sd") :
             errstr += " sd=%f" % noise["sd"]
         if noise.has_key("sr") :
             errstr += " sr=%f" % noise["sr"]
         self.varsP.updatePipeReport(errstr+"\n")
         util.LogError("critical", errstr)
         util.LogStatus("progress", "pipeline", "failure") #possibly redundant with DNPipeline.finalizePipeline
         raise RuntimeError
コード例 #5
0
 def mergeComplete(self):
     """Test if merge possibilities are exhaused and increment names and counters.
     If RefAligner argument -pairmergeRepeat is used, always terminate.
     """
     prevCount = self.countContigs(self.varsP.inputContigFolder,
                                   self.prevPrefix)
     curCount = self.countContigs(self.varsP.outputContigFolder,
                                  self.curPrefix)
     #self.varsP.stageComplete = 'Merge% 2d' % self.varsP.extensionCount
     self.varsP.stageComplete = self.stageName
     self.checkResults()
     contigCount = '  %s %d to %s %d' % (self.prevPrefix, prevCount,
                                         self.curPrefix, curCount)
     self.varsP.inputContigPrefix = self.curPrefix
     self.varsP.inputContigFolder = self.varsP.outputContigFolder
     self.varsP.outputContigPrefix = self.curPrefix
     utilities.LogStatus(
         "progress", "stage_complete",
         self.groupName)  #a stage is each merge (A, B, etc), not all
     term = "-pairmergeRepeat" in self.varsP.argsListed('merge')
     if term or curCount <= 1 or curCount >= prevCount or self.iterCount >= len(
             self.alphabet) - 1:
         # Terminate Merging
         contigCount += '  .. Terminate Merge ..'
         self.varsP.updatePipeReport(contigCount + '\n')
         if curCount == 0:
             self.varsP.outputContigPrefix = self.prevPrefix
         self.varsP.mergeIntoSingleCmap()
         return 1
     else:
         contigCount += '  .. Continue Merge ..'
         self.varsP.updatePipeReport(contigCount + '\n')
         return 0
コード例 #6
0
 def __init__(self, varsP, splitname="SplitBNX") :
     """splitBNX.__init__: this class is for splitting the sorted bnx file into
     smaller chunks for easier processing with the Pairwise class. Like the
     sortBNX class, the constructor also calls varsP.runJobs and doAllPipeReport.
     """
     util.LogStatus("progress", "stage_start", splitname)
     self.varsP = varsP #fewer code modifications below
     self.stageName = splitname
     if not self.generateJobList() : #check return value, and runJobs only if False
         self.varsP.runJobs(self, splitname)
     self.doAllPipeReport()
     if not self.allResultsFound() :
         err = "ERROR: splitBNX failed. Check: "+self.varsP.sorted_file+".bnx"
         self.varsP.updatePipeReport(err+"\n")
         util.LogError("critical", err)
         util.LogStatus("progress", "pipeline", "failure")
         raise RuntimeError
     util.LogStatus("progress", "stage_complete", splitname)
コード例 #7
0
 def endStage(self):
     """Call this in place of checkResults when this stage is bypassed."""
     if self.refineStage not in ['refineB0', 'refineFinal0', 'extension0']:
         self.varsP.mergeIntoSingleCmap()
     StageName = self.refineStage + (
         "_%i" % self.varsP.extensionCount
         if self.refineStage.startswith("extension") else ""
     )  #for status.xml only
     self.varsP.stageComplete = StageName
     util.LogStatus("progress", "stage_complete", StageName)
コード例 #8
0
 def __init__(self, varsP):
     self.varsP = varsP
     self.varsP.extensionCount += 1
     self.stageName = 'Extension_' + str(self.varsP.extensionCount)
     utilities.LogStatus("progress", "stage_start", self.stageName)
     mthread.jobWrapper.__init__(
         self,
         varsP,
         self.stageName,
         clusterArgs=varsP.getClusterArgs('extension'))
     extContigPrefix = self.varsP.expID + '_ext%s' % self.varsP.extensionCount
     varsP.prepareContigIO(extContigPrefix, self.stageName)
     self.generateJobList()
コード例 #9
0
 def checkResults(self, stageSuffix=""):
     '''Call jobWrapper (self) .doAllPipeReport, and varsP.mergeIntoSingleCmap.
     stageSuffix, if supplied, is appended to varsP.stageComplete in order to
     fix the stage name reported by the CharacterizeModule in the informaticsReport.
     '''
     self.doAllPipeReport()
     self.varsP.stageComplete = self.refineStage + stageSuffix
     if self.refineStage not in ['refineB0', 'refineFinal0', 'extension0']:
         self.varsP.mergeIntoSingleCmap()
     StageName = self.refineStage + (
         "_%i" % self.varsP.extensionCount
         if self.refineStage.startswith("extension") else ""
     )  #for status.xml only
     util.LogStatus("progress", "stage_complete", StageName)
コード例 #10
0
    def generateJobList(self) :
        """splitBNX.generateJobList: submit varsP.nPairwiseJobs number of split bnx jobs. """

        sorted_file = self.varsP.sorted_file
        if not util.checkFile(sorted_file+".bnx") :
            err = "ERROR: splitBNX input file (%s) not found; exiting" % self.varsP.sorted_file
            self.varsP.updatePipeReport(err+"\n")
            util.LogError("critical", err)
            util.LogStatus("progress", "pipeline", "failure")
            raise RuntimeError

        N = calculateNPairwise(self.varsP, sorted_file) #move back here (not sortBNX) bc needs to use sorted bnx
        #N = self.varsP.nPairwiseJobs

        self.varsP.updatePipeReport('Splitting BNX\n')
        #splitJobSet=mthread.jobWrapper(self.varsP,jobName,clusterArgs=self.varsP.getClusterArgs('splitting'))
        super(splitBNX, self).__init__(self.varsP, self.stageName, clusterArgs=self.varsP.getClusterArgs('splitting'))

        #should skip the rest and return 1, like in sortBNX, here:
        if not self.varsP.executeCurrentStage:
            return 1 #tell self.__init__ not to continue processing

        self.varsP.updatePipeReport("Splitting"+(" scan-scaled" if self.varsP.doScanScale else "")+" bnx file: %s.bnx\n\n" % self.varsP.sorted_file)

        #calculate threads per job: used to be fixed at 1, now file size / 1.5 GB rounded up. This was too low, add 1.
        threads = max(1, int(math.ceil( os.path.getsize(sorted_file+".bnx")/1.5e9 ))) + 1
        if threads > 1 :
            self.varsP.updatePipeReport("Using %i threads per job\n" % threads)

        #the change in job partitioning breaks backward compatibility and was causing too many problems; make it conditional on refaligner version
        #this is now obsolete: assume binaries are up-to-date
        if False : #self.varsP.refaligner_version < 3995 :
            for partial in range(1,N + 1):
                output_file=self.varsP.bnxFile.replace(".bnx", "_%s_of_%s" %(partial, self.varsP.nPairwiseJobs))
                cargs=[self.varsP.RefAlignerBin, '-f', '-i', sorted_file+".bnx", "-maxthreads", str(threads), "-merge", "-subsetbin", str(partial), str(N), "-bnx", "-o",  output_file]
                if self.varsP.stdoutlog :
                    cargs.extend( ['-stdout', '-stderr'] )
                #print('%d/%d' % (partial, N), cargs)
                expectedResultFile=output_file+".bnx"
                self.addJob(mthread.singleJob(cargs, self.stageName + str(partial), expectedResultFile, self.stageName + str(partial), maxThreads=threads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=output_file+".stdout"))

        else :
            #change above to single command with -subsetbin 0 N
            output_file=self.varsP.bnxFile.replace(".bnx", "")
            cargs=[self.varsP.RefAlignerBin, '-f', '-i', sorted_file+".bnx", "-maxthreads", str(threads), "-merge", "-subsetbin", "0", str(N), "-bnx", "-o",  output_file]
            if self.varsP.stdoutlog :
                cargs.extend( ['-stdout', '-stderr'] )
            self.addJob(mthread.singleJob(cargs, self.stageName, output_file+".bnx", self.stageName, maxThreads=threads, clusterLogDir=self.varsP.clusterLogDir, expectedStdoutFile=output_file+".stdout"))
コード例 #11
0
 def generateJobList(self):
     """Defines job parameters for merge. Updates variables for subsequent
     completion test in mergeComplete()
     """
     self.clearJobs()
     self.prevPrefix = self.varsP.inputContigPrefix
     #self.curPrefix = self.prefixIter.next()
     self.curPrefix = self.stagePrefix + self.alphabet[self.iterCount]
     self.groupName = self.stageName + self.alphabet[
         self.iterCount]  #jobWrapper data member
     utilities.LogStatus("progress", "stage_start", self.groupName)
     self.varsP.updatePipeReport('   PREV PREFIX %s, CUR PREFIX %s' %
                                 (self.prevPrefix, self.curPrefix))
     self.iterCount += 1
     outputString = os.path.join(self.varsP.outputContigFolder,
                                 self.curPrefix)
     currentArgs = [self.varsP.RefAlignerBin, '-o', outputString]
     #if self.varsP.stdoutlog : #always use this here bc it's the only output which should always be there
     currentArgs.extend(['-f', '-stdout', '-stderr'])
     currentArgs += self.varsP.argsListed('merge')
     currentArgs += ['-maxthreads', str(self.varsP.nThreads)]
     contigsTextFile = os.path.join(self.varsP.inputContigFolder,
                                    'mergeContigs.txt')
     contigFiles, contigIDs = self.varsP.findContigs(
         self.varsP.inputContigFolder,
         self.prevPrefix,
         txtOutput=contigsTextFile
     )  #this method creates the mergeContigs.txt file which is necessary for this job
     self.varsP.prefixUsed.append(self.curPrefix)
     fileArgs = ['-if', contigsTextFile]
     #expoutput = outputString+".align" #don't know which contigs will disappear, but should always get an align file -- with new arg 'pairmergeRepeat', there's no .align; use stdout
     expoutput = outputString + ".stdout"
     s1Job = mthread.singleJob(currentArgs + fileArgs,
                               self.groupName,
                               expoutput,
                               self.groupName,
                               maxThreads=self.varsP.nThreads,
                               clusterLogDir=self.varsP.clusterLogDir,
                               expectedStdoutFile=outputString + ".stdout")
     self.addJob(s1Job)
     self.logArguments()
コード例 #12
0
 def checkResults(self):
     #old heading says complete here and then summary after contig list; new says summary here
     outstr = 'Stage Summary: %s\n' % self.stageName
     if not self.varsP.ref:  #still want contig stats
         infoReport = "Skipping Characterize because no reference (-r)\n"
         self.varsP.updatePipeReport(
             infoReport,
             printalso=False)  #put this in pipereport just as an fyi
         infoReport += outstr
         #infoReport += 'Stage Complete: %s\n' % self.groupName #set in jobWrapper constructor
         #infoReport += MapClassesRev.ContigCharacterizationNoRef(self.varsP,self.groupName)
         infoReport += characterizeContigs(self.varsP)
         self.varsP.updateInfoReport(infoReport + '\n')
         return
     self.doAllPipeReport()
     #infoReport = 'Stage Complete: %s\n' % self.groupName #set in jobWrapper constructor
     #infoReport += MapClassesRev.TopLevelCharacterization(self.varsP,self.curCharacterizeFileRoots,self.groupName)
     #infoReport += 'OLD characterize\n' #debug
     infoReport = characterizeContigs(self.varsP, self.xmapTarget)
     self.varsP.updateInfoReport(outstr + infoReport + '\n')
     util.LogStatus("progress", "stage_complete", self.stageName)
コード例 #13
0
 def __init__(self, refineStage, varsP):
     validstages = ['refineA', 'refineB', 'refineNGS', 'refineFinal']
     if not refineStage in validstages:
         varsP.error += 1
         varsP.message += '  Error: Refine stage name invalid: ' + str(
             refineStage) + '\n'
         return
     self.refineStage = refineStage
     self.varsP = varsP
     utilities.LogStatus("progress", "stage_start", self.refineStage)
     #super is more pythonic than referring to the base class explicitly (only matters for multiple inheritance)
     super(Refine,
           self).__init__(varsP,
                          refineStage,
                          clusterArgs=varsP.getClusterArgs(refineStage))
     intermediateContigPrefix = self.varsP.expID + self.refineStage.replace(
         "refine", "_r")
     self.varsP.prepareContigIO(intermediateContigPrefix, refineStage)
     #modify results of varsP.prepareContigIO for special case of refineNGS
     if self.refineStage == 'refineNGS':
         self.varsP.inputContigPrefix = self.varsP.ngsContigPrefix
         self.varsP.inputContigFolder = self.varsP.ngsInDir
     self.generateJobList()
コード例 #14
0
def checkMinMol(varsP, input_file, minmol=2) :
    '''Simplified version of calculateNPairwise which just checks that there are at least minmol molecules.'''
    f=open(input_file, "r")
    count=0
    #length=0
    #site_count=0.0
    for line in f:
        if line[0] == "0":
            #x=line.split()
            count+=1
            #length+=float(x[2])
        #if line[0] == "1":
        #site_count+=len(line.split())-1
        if count > minmol : #this is all we need to check
            break
    f.close()

    #check that we have more than 1 molecule; if not, there's nothing to assemble, so exit
    if count < minmol :
        err = "ERROR in calculateNPairwise: number of molecules (%i) is too few for assembly; check bnx: %s" % (count, input_file)
        varsP.updatePipeReport(err+"\n")
        util.LogError("critical", err)
        util.LogStatus("progress", "pipeline", "failure")
        raise RuntimeError #will be caught in DNPipeline.constructData
コード例 #15
0
    def __init__(self, varsP) :
        """splitBNX.__init__: this class is for sorting the input bnx
        for subsequent splitting by the splitBNX class, and eventually
        easier processing with the Pairwise class. The constructor
        (this) will call varsP.runJobs and doAllPipeReport, then
        instantiate splitBNX, which will do all the splitting required
        for the Pairwise class.
        """
        self.stageName = "Autonoise0"
        self.varsP = varsP #fewer code modifications below
        
        util.LogStatus("progress", "stage_start", self.stageName) #after above bc check if bypass (executeCurrentStage)

        self.output_folder = os.path.join(self.varsP.contigFolder, "auto_noise")
        if not util.checkDir(self.output_folder) : #will make if not exist, only returns False if already exists or can't make
            print "ERROR in autoNoise: bad dir:", self.output_folder
            raise RuntimeError
	    
        # We use assembly section here because the memory usage is higher than pairwise, while the jobs are quite short.
        #sortJobSet=mthread.jobWrapper(self.varsP,jobName,clusterArgs=self.varsP.getClusterArgs('assembly'))
        super(autoNoise, self).__init__(self.varsP, self.stageName, clusterArgs=self.varsP.getClusterArgs("assembly"))

        bnxfile = self.varsP.bnxFile if varsP.noiseOnly else self.varsP.sorted_file+".bnx"
        #was return if generateJobListChar, but need to get readparameters if bypass
        if not self.generateJobListChar({}, bnxfile, "autoNoise0") : #return 0 for success, 1 for skip
            self.varsP.runJobs(self, "AutoNoise0")
            self.doAllPipeReport()
        if not self.allResultsFound() :
            self.varsP.updatePipeReport("ERROR: AutoNoise0 failed. Check: "+self.output_file+".stdout\n")
            raise RuntimeError
        util.LogStatus("progress", "stage_complete", self.stageName)
            
        self.varsP.noise0 = readNoiseParameters(self.output_file)
	self.isBadErrorParams(self.varsP.noise0, 0)

        self.stageName = "Autonoise1"
        self.groupName = self.stageName #fix so that LogStatus call in MultiThreading.multiThreadRunJobs
        util.LogStatus("progress", "stage_start", self.stageName)

        self.clearJobs()
        
	self.varsP.replaceParam("noise0", "-readparameters", self.output_file+".errbin")

        #need to call again to set self.output_file
        if not self.generateJobListChar(self.varsP.noise0, bnxfile, "autoNoise1") : #return 0 for success, 1 for skip
            self.varsP.runJobs(self, "AutoNoise1")
            self.doAllPipeReport()
        if not self.allResultsFound() :
            self.varsP.updatePipeReport("ERROR: AutoNoise1 failed. Check: "+self.output_file+".stdout\n")
            raise RuntimeError
            
        self.varsP.noise1 = readNoiseParameters(self.output_file)
        
	infoReport="Automatically determined noise parameters:\n"
        klist = ["FP", "FN", "sf", "sd", "sr", "bpp", "readparameters"] #hardcoding parameters is kind of bad, but it fixes the order without using OrderedDict.
        for v in klist :
            if not self.varsP.noise1.has_key(v) :
                continue
            param=str(self.varsP.noise1[v])
            util.LogStatus("parameter", "auto_"+v, param)
            infoReport+=v+":"+param+"\n"
            self.varsP.replaceParam("noise0", "-"+v, param)
        self.varsP.updateInfoReport(infoReport + '\n')
        self.isBadErrorParams(self.varsP.noise1, 1)

        if self.varsP.doScanScale : #change the sorted_file to the rescaled bnx file
            rescaledbnx = self.output_file + self.varsP.rescaleSuffix #no ".bnx" in suffix
            if not util.checkFile(rescaledbnx+".bnx") : #not found--not an error if bnx 0.1 is used
                err = "Warning: scan scaled bnx not found after autoNoise1; not performing scan scaling--check that bnx 1.0 or later used in input"
                self.varsP.updatePipeReport( err+"\n\n" )
                util.LogError("warning", err)
                self.varsP.doScanScale = False
            else : #log that scan scaling is used
                self.varsP.updatePipeReport( "Using scan scaled bnx: "+rescaledbnx+".bnx\n\n" )
                util.LogStatus("parameter", "scanscaled_bnx", rescaledbnx+".bnx")
                self.varsP.sorted_file = rescaledbnx #this variable is used in splitBNX (PairwiseModule.py)
            
        util.LogStatus("progress", "stage_complete", self.stageName)
コード例 #16
0
                break
            if case():
                #varsP.error += 1 #these don't do anything
                #varsP.message += '  Error: Refine stage name invalid: '+str(StageName)+'\n'
                self.varsP.updatePipeReport(
                    "Internal error: unknown stage %s" % StageName)
                return

        clusargs = varsP.getClusterArgs(
            StageName
        )  #get arguments before changing StageName, then add suffix
        StageName += (
            ("_%i" % self.varsP.extensionCount)
            if StageName.startswith("extension") else "")  #for status.xml only
        self.varsP.stageName = StageName
        util.LogStatus("progress", "stage_start", StageName)
        #super is more pythonic than referring to the base class explicitly (only matters for multiple inheritance)
        super(Refine, self).__init__(varsP, StageName, clusterArgs=clusargs)
        #intermediateContigPrefix = self.varsP.expID + self.StageName.replace("refine", "_r")
        self.varsP.prepareContigIO(ContigPrefix, StageName)
        #modify results of varsP.prepareContigIO for special case of refineNGS
        self.generateJobList()

    def runJobs(self):
        self.multiThreadRunJobs(self.varsP.nThreads, sleepTime=0.2)

    def writeIDFile(self, nJobs):
        f1 = open(self.varsP.idFile, 'wb')
        f1.write(str(nJobs))
        f1.close()
コード例 #17
0
 def checkResults(self):
     self.varsP.stageComplete = 'Extension% 2d' % self.varsP.extensionCount
     self.varsP.mergeIntoSingleCmap()
     self.doAllPipeReport()  #see Multithreading.jobWrapper
     utilities.LogStatus("progress", "stage_complete", self.stageName)
コード例 #18
0
    def __init__(self, varsP):
        jobName = "reference_process"
        opta_section = "referenceSvdetect"
        default_mres = "2.9"
        mres = "-mres"
        self.varsP = varsP
        usedefault = False
        if self.varsP.argData.has_key(opta_section):  #check if in optargs
            opta = self.varsP.argsListed(opta_section)
            if not mres in opta:  #must have mres
                self.varsP.updatePipeReport(
                    "Warning in referenceProcess: " + mres +
                    " missing in optArguments section " + opta_section + "\n")
                usedefault = True
        else:
            self.varsP.updatePipeReport(
                "Warning in referenceProcess: optArguments section " +
                opta_section + " missing\n")
            usedefault = True
        if usedefault:
            opta = [mres, default_mres]

        mresstr = opta[opta.index(mres) +
                       1]  #get string for mres value for output name
        mresstr = mresstr.replace(".", "")

        if not util.checkDir(self.varsP.refFolder):
            self.varsP.updatePipeReport(
                "ERROR in referenceProcess: could not make output dir %s\n" %
                self.varsP.refFolder)
            return None
        refpref = os.path.basename(
            self.varsP.ref[:self.varsP.ref.rfind(".")]) + "_res" + mresstr
        outarg = os.path.join(
            self.varsP.refFolder,
            refpref)  #refFolder is new output folder for this job
        expectedResultFile = outarg + ".cmap"  #if ref is spots, is this spots?
        args = [
            self.varsP.RefAlignerBin, '-f', '-o', outarg, '-i', self.varsP.ref,
            '-merge'
        ] + opta
        stdoutf = None
        if self.varsP.stdoutlog:
            args.extend(['-stdout', '-stderr'])
            stdoutf = outarg + ".stdout"
        args += ['-maxthreads', str(self.varsP.nThreads)]

        super(referenceProcess,
              self).__init__(self.varsP,
                             jobName,
                             clusterArgs=self.varsP.getClusterArgs("assembly"))

        job = mthread.singleJob(args,
                                jobName,
                                expectedResultFile,
                                jobName,
                                maxThreads=self.varsP.nThreads,
                                clusterLogDir=self.varsP.clusterLogDir,
                                expectedStdoutFile=stdoutf)
        self.addJob(job)

        util.LogStatus("progress", "stage_start", jobName)
        self.varsP.runJobs(self, "referenceProcess")
        self.doAllPipeReport()
        if not self.allResultsFound(
        ):  #this is an error, but we'll continue processing without SV detect
            err = "ERROR in referenceProcess: job failed, disabling SV detect"
            self.varsP.updatePipeReport(err + "\n")
            util.LogError("error", err)
            #self.varsP.runSV = False #no need since this class is used in SVModule
        else:
            self.varsP.refDeresed = expectedResultFile  #store good result for SV detect
            self.varsP.updatePipeReport(
                "referenceProcess: using reference %s for svdetect\n" %
                self.varsP.refDeresed)
        util.LogStatus("progress", "stage_complete", jobName)
コード例 #19
0
 def endStage(self):  #same as GroupedRefinementModule.Refine.endStage
     utilities.LogStatus("progress", "stage_complete", self.refineStage)
コード例 #20
0
def runAlignMol():
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument(
        '-q',
        dest='queryDir',
        help=
        'Path to merged cmap to align molecules (-b) to OR alignmol dir from Pipeline for merge (if latter, no alignments are performed), required',
        type=str)
    parser.add_argument(
        '-b',
        dest='bnx',
        help='Input molecule (.bnx) file, required if aligning molecules',
        type=str)
    #parser.add_argument('-b', dest='bnx', help='Input molecule (.bnx) file OR path to dir containing split bnx pieces, required if aligning molecules', type=str) #I should add the split feature; for now, just do single bnx
    parser.add_argument(
        '-a',
        dest='optArguments',
        help=
        'Path to optArguments.xml (optional, default optArguments_human.xml in Pipeline dir if found, otherwise required)',
        default="",
        type=str)
    parser.add_argument(
        '-r',
        help=
        'If this flag is used, alignmolvref arguments are used, otherwise alignmol arguments are used (default alignmol; optional)',
        dest='ref',
        action='store_true')
    parser.add_argument(
        '-o',
        dest='outputDir',
        help=
        'output dir (optional, defaults to sub-dir of input map dir called "alignmol")',
        default="",
        type=str)
    parser.add_argument(
        '-t',
        dest='RefAligner',
        help='Path to RefAligner or dir containing it (required)',
        type=str)
    parser.add_argument(
        '-T',
        dest='numThreads',
        help='Total number of threads (cores) to use (optional, default 4)',
        default=4,
        type=int)
    parser.add_argument(
        '-j',
        dest='maxthreads',
        help=
        'Threads per Job, -maxthreads (non-cluster only;optional, default 4)',
        default=4,
        type=int)
    parser.add_argument(
        '-e',
        dest='errFile',
        help=
        '.err file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise',
        default="",
        type=str)
    parser.add_argument(
        '-E',
        dest='errbinFile',
        help=
        '.errbin file to use for noise parameters--will supersede noise parameters in the optArgument supplied (but that file must still be supplied for non-noise parameters)--should be from autoNoise',
        default="",
        type=str)
    parser.add_argument(
        '-p',
        dest='pipelineDir',
        help=
        'Pipeline dir (optional, defaults to script dir, or current directory)',
        default="",
        type=str)
    result = parser.parse_args()

    outprefix = "exp_refineFinal1"  #this is the default; assume for now

    #check all Pipeline dependencies
    if result.pipelineDir:
        cwd = result.pipelineDir
    else:
        cwd = os.path.split(
            os.path.realpath(__file__))[0]  #this is path of this script
        if not os.path.isfile(os.path.join(
                cwd,
                "utilities.py")):  #if still not here, last try is actual cwd
            cwd = os.getcwd()  #still check this below

    #this is the only one imported here and in runCharacterize
    if not os.path.isfile(os.path.join(cwd, "utilities.py")):
        print "ERROR: utilities.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import utilities as util

    if not os.path.isfile(os.path.join(cwd, "AlignModule.py")):
        print "ERROR: AlignModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import AlignModule as alignmod

    if not util.checkFile(os.path.join(cwd, "Pipeline.py")):
        print "ERROR: Pipeline.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import Pipeline

    if not util.checkFile(os.path.join(cwd, "mapClasses.py")):
        print "ERROR: mapClasses.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    import mapClasses as mc

    #input dir
    if not result.queryDir:
        print "ERROR: Query (-q) argument not supplied."
        sys.exit(1)
    qrypath = os.path.realpath(result.queryDir)
    if util.checkDir(
            qrypath, checkWritable=False,
            makeIfNotExist=False):  #output elsewhere so not writeable is ok
        runaligns = False
    elif util.checkCmap(qrypath):
        runaligns = True
    else:
        print "ERROR: Query argument (" + qrypath + ") not found or not a dir or cmap. Check -q argument."
        sys.exit(1)

    #this check isn't really necessary...make it a warning -- left over from runAlignMerge.py
    #if not os.path.split(qrypath)[1].endswith("alignmol") :
    #    print "Warning: Query dir ("+qrypath+") does not end with 'alignmol'; please be sure this is a Pipeline alignmol dir\n"

    #RefAligner -- check for either path to RefAligner, or dir containing it, depending on cluster args
    rabin = ""  #need empty string for generateJobList even though no jobs are run
    if runaligns:
        rabin = result.RefAligner
        #replicate Pipeline behavior: RefAligner is always required
        if os.path.isdir(rabin):
            rabin = os.path.join(rabin, "RefAligner")
        if not util.checkExecutable(rabin):
            print "ERROR: RefAligner not found or not executable at", rabin, "\nPlease supply RefAligner dir or full path as -t arg."
            sys.exit(1)

    #optargs file
    optargs = None
    if runaligns and result.optArguments:  #supplied on command line
        optargs = result.optArguments
        if not util.checkFile(optargs, ".xml"):
            print "optArguments path is supplied (" + optargs + ") but not found or doesn't end in .xml, check -a argument."
            sys.exit(1)
    elif runaligns:  #load from Pipeline dir if running alignments
        optargs = os.path.join(cwd, "optArguments_human.xml")
        if not util.checkFile(optargs):
            print "optArguments.xml missing in Pipeline directory (" + cwd + "). Try supplying path explicitly using -a."
            sys.exit(1)

    #output dir
    if not result.outputDir:
        outdir = os.path.join(qrypath,
                              "merge")  #should be same as in AlignModule
    else:
        outdir = os.path.realpath(result.outputDir)
    if os.path.isdir(outdir):
        if not util.checkDir(outdir):  #check writeable
            print "\nERROR: Output dir is not writeable:\n", outdir, "\n"
            sys.exit(1)
        #this is ok here
        #elif outdir == contigdir :
        #    print "\nERROR: Output dir cannot be same as input dir:\n", outdir, "\n"
        #    sys.exit(1)
        print "\nWARNING: Output dir already exists, results will be overwritten:\n", outdir, "\n"
    elif not util.checkDir(
            outdir
    ):  #does not exist, make, if False, can't make or not writeable
        print "\nERROR: Output dir cannot be created or is not writeable:\n", outdir, "\n"
        sys.exit(1)

    #bnx file
    bnxfile = result.bnx
    if bnxfile:  #must check for empty string BEFORE you do realpath, or it returns cwd
        bnxfile = os.path.realpath(bnxfile)
        if not util.checkFile(bnxfile, ".bnx"):
            print "ERROR: bnx file supplied but not found or incorrect suffix:", bnxfile
            sys.exit(1)
    elif runaligns:
        print "ERROR: bnx file not supplied but running alignments; please supply bnx file as -b argument"
        sys.exit(1)

    #nthreads
    nthreads = result.numThreads
    if nthreads <= 0:
        print "ERROR: Number of threads value invalid (must be > 0): %i" % nthreads
        sys.exit(1)

    #maxthreads
    maxthreads = result.maxthreads
    if maxthreads <= 0:
        print "ERROR: Max threads value invalid (must be > 0): %i" % maxthreads
        sys.exit(1)
    elif nthreads < maxthreads:
        print "Warning: num threads (-T: %i) < max threads (-j: %i): increasing num threads to equal max threads\n" % (
            nthreads, maxthreads)
        nthreads = maxthreads

    #.errbin file
    errbinfile = result.errbinFile
    if errbinfile:
        errbinfile = os.path.realpath(result.errbinFile)
        if not util.checkFile(errbinfile, ".errbin"):
            print "ERROR: errbin file supplied but not found or incorrect suffix:", errbinfile
            sys.exit(1)

    #.err file
    errfile = result.errFile
    if errfile and errbinfile:
        print "Warning: .err and .errbin arguments supplied; ignoring .err file"
        errfile = ""
    elif errfile:
        errfile = os.path.realpath(result.errFile)
        if not util.checkFile(errfile, ".err"):
            print "err file supplied but not found or incorrect suffix:", errfile
            sys.exit(1)

    if errfile and not util.checkFile(os.path.join(cwd,
                                                   "SampleCharModule.py")):
        print "SampleCharModule.py missing in dir", cwd, "check -p argument, or run this script in Pipeline dir"
        sys.exit(1)
    elif errfile:
        import SampleCharModule as scm

    doref = result.ref

    #DONE checking arguments

    print "Using output dir", outdir
    if runaligns:
        print "Aligning", bnxfile, "\nTo", qrypath, "\n"
    else:
        print "Merging", qrypath, "\n"

    startTime = time.time()  #time since Epoch
    memory_log = os.path.join(outdir, "memory_log.txt")
    util.initMemoryLog(memory_log)

    varsP = Pipeline.varsPipeline()
    varsP.RefAlignerBin = rabin
    varsP.contigFolder = ""  #not used but needs to be an attr
    varsP.outputContigFolder = ""  #not used but needs to be a string attr
    varsP.pipeReportFile = os.path.join(outdir, "alignmol_jobs_log.txt")
    varsP.infoReportFile = os.path.join(outdir, "alignmol_log.txt")
    util.InitStatus(os.path.join(outdir, "status.xml"))

    if runaligns:
        varsP.optArgumentsFileIn = optargs
        varsP.latestMergedCmap = qrypath  #if !doref, need this one
        varsP.ref = qrypath  #and if doref, need this one
        varsP.nThreads = nthreads  #necessary otherwise job won't start -- max threads per node
        varsP.maxthreads = maxthreads  #threads per job
        p = os.path.split(qrypath)[1]
        varsP.outputContigPrefix = p[:p.rfind(".")]  #filename prefix
        varsP.stdoutlog = True  #use -stdout -stderr
        varsP.sorted_file = bnxfile[:bnxfile.rfind(
            ".")]  #enables the mol fraction align in AlignModule.getAlignStats
        if qrypath.endswith(".cmap"):  #enable the mol stats
            varsP.totAssemblyLenMb = mc.multiCmap(
                qrypath, lengthonly=True).totalLength / 1e6

        varsP.memoryLogpath = os.path.join(outdir, "memory_log.txt")
        varsP.parseArguments()  #parses optArgumentsFile
        varsP.checkDependencies()
        varsP.RefAlignerBinOrig = rabin
        varsP.prerunLog(
        )  #general information in log -- needed for refaligner_version

        noisep = {}
        if errbinfile:
            noisep = {"readparameters": errbinfile}
            #print "Using noise parameters from "+errbinfile+"\n" #move below
        elif errfile:
            noisep = scm.readNoiseParameters(errfile.replace(".err", ""))
            if noisep.has_key(
                    'readparameters'
            ):  #remove this because it's redundant, and it can cause problems with RefAligner compatibility
                del noisep['readparameters']
            if not noisep:  #readNoiseParameters returns empty dict on failure
                print "ERROR reading noise parameters, check .err file:", errfile
                sys.exit(1)
            #redundant with below?
            print "Using noise parameters from " + errfile + ":\n" + " ".join(
                ["-" + str(k) + " " + str(v)
                 for k, v in noisep.iteritems()]) + "\n"

        #some code from SampleCharModule to load args into noise0
        infoReport = "Loaded noise parameters:\n"
        klist = [
            "FP", "FN", "sf", "sd", "sr", "bpp", "readparameters"
        ]  #hardcoding parameters is kind of bad, but it fixes the order without using OrderedDict.
        #noiseargs = self.varsP.argsListed('noise0') #not necessary
        for v in klist:
            if not noisep.has_key(v):
                continue
            param = str(noisep[v])
            util.LogStatus("parameter", "auto_" + v, param)
            infoReport += v + ":" + param + "\n"
            varsP.replaceParam("noise0", "-" + v, param)
        varsP.updateInfoReport(infoReport + '\n', printalso=True)

    else:
        print "Getting file list from", qrypath
        outFileList = getOutFileList(util, qrypath)
        if not outFileList:
            print "ERROR: Query dir (" + qrypath + ") does not contain alignmol data. Check -q argument."
            sys.exit(1)
        else:
            print "Found", len(outFileList), "alignment results"
    #end if runaligns

    amod = alignmod.AlignModule(
        varsP, doref, outdir, bnxfile)  #constructor will call generateJobList

    if runaligns:
        amod.runJobs()
        amod.checkResults()
    else:
        amod.outFileList = outFileList
        p = os.path.split(outFileList[0])[1]
        if p.count("_") > 1:  #expect something like "EXP_REFINEFINAL1_4"
            #p = p[:p.rfind("_")+1] #remove integer suffix
            p = p[:p.rfind("_")]  #remove integer suffix (and underscore)
        #else :
        #    p += "_" #because mrgstr is appended
        varsP.outputContigPrefix = p

    if not runaligns or len(amod.jobList) > 0:
        amod.getAlignStats()

    if runaligns:
        print
        #copy from Pipeline.py
        if util.SummarizeErrors(varsP=varsP) == 0:
            varsP.updatePipeReport("Pipeline has successfully completed\n")
            util.LogStatus("progress", "pipeline", "success")
        else:
            varsP.updatePipeReport("Pipeline has completed with errors\n")
            util.LogStatus("progress", "pipeline", "failure")

    #BELOW OLD CODE

    return

    #in Pipeline, this is called first
    #print "Calling getAlignStats:" #but it won't work without varsP atm; skip it
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)
    #getAlignStats(self.varsP, self.outFileList, self.varsP.totAssemblyLenMb, isref=False, mergepath=self.mergedir)

    print "Calling mergeMap"
    print outFileList[0]  #, "\n", outputdir #moved above
    util.logMemory(memory_log, startTime, "mergeMap_start")
    #mergeMap(self.varsP, self.outFileList, mergepath=self.outputdir) #varsP is optional
    alignmod.mergeMap(None, outFileList, outputdir)
    util.logMemory(memory_log, startTime, "mergeMap_end")

    print "Calling mergeRcmaps"
    util.logMemory(memory_log, startTime, "mergeRcmaps_start")
    #mergeRcmaps(outFileList, outdir, varsP=None, splitByContig=None, stageName="alignmol") :
    alignmod.mergeRcmaps(outFileList,
                         outputdir,
                         splitByContig=True,
                         stageName=outprefix)
    util.logMemory(memory_log, startTime, "mergeRcmaps_end")

    print "Calling split_XMap_byContig"  #split_XMapQcmap_byContig"
    util.logMemory(memory_log, startTime, "split_XMap_byContig_start")
    #xmapdict = alignmod.split_XMap_byContig(outFileList, outputdir, stageName=outprefix) #old
    xmapdict = alignmod.split_XMap_byContig_new(outFileList,
                                                outputdir,
                                                stageName=outprefix)
    util.logMemory(memory_log, startTime, "split_XMap_byContig_end")

    print "Calling split_Qcmap_byContig"
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_start")
    #alignmod.split_Qcmap_byContig(outFileList, outputdir, xmapdict) #old
    alignmod.split_Qcmap_byContig_new(
        outFileList, outputdir, xmapdict,
        stageName=outprefix)  #new: better performance
    util.logMemory(memory_log, startTime, "split_Qcmap_byContig_end")

    print "AlignMerge successfully completed"
コード例 #21
0
    def multiThreadRunJobs(self,
                           nActiveThreads,
                           sleepTime=0.01,
                           threadControl=False,
                           background=False,
                           callLogStatus=True):
        """Main Queue script, start jobs, check for completion
        
        """

        #this is useful as a generic way to skip running a module--no jobs are submitted
        if len(self.jobList) == 0:
            self.varsP.updatePipeReport(
                " Warning in multiThreadRunJobs: number of jobs is 0, skipping stage: "
                + self.groupName + "\n")
            return

        if nActiveThreads == 0:
            self.varsP.updatePipeReport(
                " Error in multiThreadRunJobs: nActiveThreads must be > 0, skipping stage: "
                + self.groupName + "\n")
            return

        if background:
            start_new_thread(self.multiThreadRunJobs,
                             (nActiveThreads, sleepTime, threadControl, False))
            return

        utilities.logMemory(
            self.varsP.memoryLogpath, self.varsP.startTime,
            self.groupName)  #call at start and end of this method
        jobw = 30  #width of job name in printout
        print ' Starting Multi-Threaded Process:'
        print '  ' + self.groupName
        self.nThreads = nActiveThreads
        availableThreads = nActiveThreads
        startTime = time.time()
        activeJobList = []
        nActiveJobs = 0
        nFinishedJobs = 0
        nActiveThrottle = 0
        nJobs = len(self.jobList)
        nRemainingJobs = nJobs

        global cSession
        if self.onCluster and cSession == None:
            cSession = drmaa.Session()
            cSession.initialize()
        print '  Running ' + str(nJobs) + ' jobs with ' + str(
            nActiveThreads) + ' threads'
        if callLogStatus:
            utilities.LogStatus("progress", "jobs_outstanding", str(nJobs),
                                self.groupName)
            utilities.LogStatus("progress", "stage_pct_done", "0.0",
                                self.groupName)
        job_status = (0, nJobs)
        while True:
            if nRemainingJobs > 0:
                for i, sJob in enumerate(self.jobList):
                    if sJob.jobStarted or sJob.isRunning or sJob.isComplete:
                        continue
                    if sJob.hasContingentJob:
                        if not sJob.contingentJob.isComplete:
                            continue
                    if not (sJob.onCluster):
                        if nActiveJobs >= nActiveThreads:
                            continue
                    if not (sJob.onCluster):
                        if availableThreads < sJob.maxThreads:
                            continue
                    if self.throttle and sJob.throttleClass:
                        if nActiveThrottle >= self.throttleMax:
                            continue
                        nActiveThrottle += 1
                    activeJobList.append(sJob)
                    nActiveJobs += 1
                    sJob.startJob(cSession=cSession,
                                  clusterArgs=self.clusterArgs)
                    availableThreads -= sJob.maxThreads
                    nRemainingJobs -= 1
                    statusString = ('   START% 4d: % ' + str(jobw) +
                                    's,% 3dThr,% 4dR,% 4dT,% 4dF,% 4dQ') % (
                                        sJob.jobNum, sJob.jobName[:jobw],
                                        nActiveThreads, nActiveJobs, nJobs,
                                        nFinishedJobs, nRemainingJobs)
                    print statusString
                    sys.stdout.flush()
                    time.sleep(
                        sleepTime
                    )  #sleep between job submission, but wait to check status

            #The block below is error prone in the case of multiple jobWrapper objects running simultaneously,
            # which we have implemented for CharacterizeModule using threading. The problem is that the
            # characterize os.wait call can steal the pid of another job, say, refinement, and then the
            # refinement job will never be marked completed. Simplest is to just wait, and inside
            # CheckIfRunning, the poll will take care of each job individually.
            time.sleep(sleepTime)
            '''
	    (pid, rc)=(-1, -1) # Defaults so the statement works for cluster jobs
            if self.onCluster : #if on cluster, no os.wait call is needed; sleep instead, then check all jobs
                time.sleep(sleepTime)
            else :
                try : #if not on cluster, use os.wait to wait for child process to finish
                    global my_wait
                    (pid, rc)=my_wait() #any child?
                except OSError, e :
                    time.sleep(sleepTime)
                    #print e

            #Set the return code of the job which was stolen by the wait call above (see comment below).
            for sJob in activeJobList: 
                if sJob.markCompleted(pid, rc) :
                    break #skip rest once correct one found
                    '''

            #The old version of this loop was dangerous because it popped from the list being iterated over.
            #So, if you skip a job due to this and that job's return code was stolen by the wait above,
            # then the job is never marked complete.
            #Though the below fix is probably sufficient, do the above also just to be safe.
            #If you iterate backwards, using reversed, removing an element will not affect the loop on the remaining elements
            for sJob in reversed(activeJobList):
                #sJob.markCompleted(pid, rc) #this call moved into loop above (see above comments)
                if sJob.CheckIfRunning(cSession=cSession):
                    continue
                else:
                    #activeJobList.pop(i)
                    activeJobList.remove(sJob)
                    nActiveJobs -= 1
                    nFinishedJobs += 1
                    availableThreads += sJob.maxThreads
                    availableThreads = min(nActiveThreads, availableThreads)
                    if self.throttle and sJob.throttleClass:
                        nActiveThrottle -= 1
                    statusString = ('   STOP % 4d: % ' + str(jobw) +
                                    's,% 3dThr,% 4dR,% 4dT,% 4dF,% 4dQ') % (
                                        sJob.jobNum, sJob.jobName[:jobw],
                                        nActiveThreads, nActiveJobs, nJobs,
                                        nFinishedJobs, nRemainingJobs)
                    statusString += ' ' + timeFormat1(sJob.runTime)
                    print statusString

            #log status after above loop to calculate nFinishedJobs
            pct_done = (nFinishedJobs * 100.0 / nJobs if nJobs > 0 else 0)
            njr = nJobs - nFinishedJobs  #num jobs remaining
            new_status = (pct_done, njr)
            if job_status != new_status and callLogStatus:
                utilities.LogStatus("progress", "jobs_outstanding", "%d" % njr,
                                    self.groupName)
                utilities.LogStatus("progress", "stage_pct_done",
                                    "%.01f" % pct_done, self.groupName)
                job_status = new_status

            if nActiveJobs == 0 and nRemainingJobs == 0:
                break
            elif nActiveJobs < 0 or nRemainingJobs < 0:
                print "ERROR in multithreading: invalid: nActiveJobs:", nActiveJobs, "nRemainingJobs:", nRemainingJobs
                break

            #Note: you cannot check len(activeJobList) here becuase if one job takes all the threads,
            # it can finish, causing the list to be empty, but there are still more jobs to submit.
            # This is not an error.

            sys.stdout.flush()
        #end job submission - check loop

        #if self.onCluster:
        #   cSession.exit()
        self.elapsedTime = time.time() - startTime
        self.cpuTime = 0.
        for sJob in self.jobList:
            self.cpuTime += sJob.runTime
        print ' Finished Multi-Threaded Process:'
        print '  ' + self.groupName
        print  #extra newline for readability
        sys.stdout.flush()
        utilities.logMemory(
            self.varsP.memoryLogpath, self.varsP.startTime,
            self.groupName)  #call at start and end of this method
コード例 #22
0
 def checkResults(self):
     self.varsP.stageComplete = self.refineStage
     self.varsP.mergeIntoSingleCmap()
     self.doAllPipeReport()  #see Multithreading.jobWrapper
     utilities.LogStatus("progress", "stage_complete", self.refineStage)