Exemple #1
0
 def testScriptTree_Example2(self):
     """Tests that the global and local temp dirs of a job behave as expected.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper2.py --jobTree %s --logLevel=INFO --retryCount=0" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
Exemple #2
0
 def testScriptTree_Example(self):
     """Uses the jobTreeTest code to test the scriptTree Target wrapper.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper.py --jobTree %s --logLevel=INFO --retryCount=10" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
Exemple #3
0
 def execute(self):
     task_id = int(os.environ['SGE_TASK_ID'])
     if task_id is None:
          RuntimeError("Multi-target launched without task id") 
     if task_id < 1 or task_id > len(self.commands):
          RuntimeError("Task ID not within the array range 1 <= %i <= %i", task_id, len(self.commands))
     (job, outfile) = self.commands[task_id - 1]
     if outfile is None:
             system(job)
     else:
             system("%s > %s" % (job, outfile))
Exemple #4
0
 def testSort(self):
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile1 = getTempFile(rootDir=tempDir)
         makeFileToSort(tempFile1)
         lines1 = loadFile(tempFile1)
         lines1.sort()
         sort(tempFile1)
         lines2 = loadFile(tempFile1)
         checkEqual(lines1, lines2)
         system("rm -rf %s" % tempDir)
Exemple #5
0
 def testGetMidPoint(self):
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile = getTempFile(rootDir=tempDir)
         makeFileToSort(tempFile)
         l = open(tempFile, 'r').read()
         fileSize = os.path.getsize(tempFile)
         midPoint = getMidPoint(tempFile, 0, fileSize)
         print "the mid point is %i of a file of %i bytes woth byte" % (midPoint, fileSize)
         assert midPoint < fileSize
         assert l[midPoint] == '\n'
         assert midPoint >= 0
         system("rm -rf %s" % tempDir)
Exemple #6
0
def runJobTree(command, jobTreeDir, logLevel="DEBUG", retryCount=0, batchSystem="single_machine", 
               rescueJobFrequency=None):
    """A convenience function for running job tree from within a python script.
    """
    if rescueJobFrequency != None:
        rescueJobFrequencyString = "--rescueJobsFrequency %s" % float(rescueJobFrequency)
    else:
        rescueJobFrequencyString = ""
    command = "jobTree --command \"%s\" --jobTree %s --logLevel %s \
--retryCount %i --batchSystem %s %s" % \
            (command, jobTreeDir,  logLevel, retryCount, batchSystem, rescueJobFrequencyString)
    logger.info("Running command : %s" % command)
    system(command)
    logger.info("Ran the jobtree apparently okay")
Exemple #7
0
 def run(self):
     while True:
         command, logFile, jobID = self.inputQueue.get()
         #fnull = open(os.devnull, 'w') #Pipe the output to dev/null (it is caught by the slave and will be reported if there is an error)
         tempLogFile = getTempFile()
         fileHandle = open(tempLogFile, 'w')
         process = subprocess.Popen(command, shell=True, stdout = fileHandle, stderr = fileHandle)
         sts = os.waitpid(process.pid, 0)
         fileHandle.close()
         #fnull.close()
         if os.path.exists(tempLogFile):
             system("mv %s %s" % (tempLogFile, logFile))
         self.outputQueue.put((command, sts[1], jobID))
         self.inputQueue.task_done()
Exemple #8
0
 def testCopySubRangeOfFile(self):
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile = getTempFile(rootDir=tempDir)
         outputFile = getTempFile(rootDir=tempDir)
         makeFileToSort(tempFile)
         fileSize = os.path.getsize(tempFile)
         assert fileSize > 0
         fileStart = random.choice(xrange(0, fileSize))
         fileEnd = random.choice(xrange(fileStart, fileSize))
         copySubRangeOfFile(tempFile, fileStart, fileEnd, outputFile)
         l = open(outputFile, 'r').read()
         l2 = open(tempFile, 'r').read()[fileStart:fileEnd]
         checkEqual(l, l2)
         system("rm -rf %s" % tempDir)
Exemple #9
0
def scriptTree_SortTest(testNo, batchSystem, lines=100000, maxLineLength=10, N=1000):
    """Tests scriptTree/jobTree by sorting a file in parallel.
    """
    for test in xrange(testNo):
        tempDir = getTempDirectory(os.getcwd())
        tempFile = getTempFile(rootDir=tempDir)
        jobTreeDir = os.path.join(tempDir, "jobTree")
        makeFileToSort(tempFile, lines=lines, maxLineLength=maxLineLength)
        #First make our own sorted version
        fileHandle = open(tempFile, 'r')
        l = fileHandle.readlines()
        l.sort()
        fileHandle.close()
        #Sort the file
        command = "scriptTreeTest_Sort.py --jobTree %s --logLevel=DEBUG --fileToSort=%s --N %i --batchSystem %s --jobTime 1.0" % (jobTreeDir, tempFile, N, batchSystem)
        system(command)
        #Now check the file is properly sorted..
        #Now get the sorted file
        fileHandle = open(tempFile, 'r')
        l2 = fileHandle.readlines()
        fileHandle.close()
        checkEqual(l, l2)
        system("rm -rf %s" % tempDir)
Exemple #10
0
 def testJobTreeStats_SortSimple(self):
     """Tests the jobTreeStats utility using the scriptTree_sort example.
     """
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile = getTempFile(rootDir=tempDir)
         outputFile = getTempFile(rootDir=tempDir)
         jobTreeDir = os.path.join(tempDir, "jobTree")
         lines=100000
         maxLineLength=10
         N=1000
         makeFileToSort(tempFile, lines, maxLineLength)
         #Sort the file
         command = "scriptTreeTest_Sort.py --jobTree %s --logLevel=DEBUG --fileToSort=%s --N %s --stats --jobTime 0.5" % (jobTreeDir, tempFile, N)
         system(command)
         #Now get the stats
         system("jobTreeStats --jobTree %s --outputFile %s" % (jobTreeDir, outputFile))
         #Cleanup
         system("rm -rf %s" % tempDir)
Exemple #11
0
def processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats):
    from workflow.jobTree.lib.bioio import getTempFile
    from workflow.jobTree.lib.bioio import getTempDirectory
    from workflow.jobTree.lib.bioio import logger
    from workflow.jobTree.lib.bioio import system
    from workflow.jobTree.lib.bioio import getTotalCpuTime
    
    assert len(job.find("children").findall("child")) == 0
    assert int(job.attrib["child_count"]) == int(job.attrib["black_child_count"])
    command = jobToRun.attrib["command"]
    #Copy the job file to be edited
    
    tempJob = ET.Element("job")
    ET.SubElement(tempJob, "children")
    
    #Log for job
    tempJob.attrib["log_level"] = job.attrib["log_level"]
    
    #Time length of 'ideal' job before further parallelism is required
    tempJob.attrib["job_time"] = job.attrib["job_time"]
    
    #Dir to put all the temp files in.
    localSlaveTempDir = getTempDirectory()

    #Temp file dirs for job.
    localTempDir = getTempDirectory(rootDir=localSlaveTempDir)
    tempJob.attrib["local_temp_dir"] = localTempDir
    depth = len(job.find("followOns").findall("followOn"))
    tempJob.attrib["global_temp_dir"] = os.path.join(job.attrib["global_temp_dir"], str(depth))
    if not os.path.isdir(tempJob.attrib["global_temp_dir"]): #Ensures that the global temp dirs of each level are kept separate.
        os.mkdir(tempJob.attrib["global_temp_dir"])
        os.chmod(tempJob.attrib["global_temp_dir"], 0777)
    if os.path.isdir(os.path.join(job.attrib["global_temp_dir"], str(depth+1))):
        system("rm -rf %s" % os.path.join(job.attrib["global_temp_dir"], str(depth+1)))
    assert not os.path.isdir(os.path.join(job.attrib["global_temp_dir"], str(depth+2)))
    
    #Deal with memory and cpu requirements (this pass tells the running job how much cpu and memory they have,
    #according to the batch system
    tempJob.attrib["available_memory"] = str(memoryAvailable)
    tempJob.attrib["available_cpu"] = str(cpuAvailable)
    if stats != None:
        tempJob.attrib["stats"] = getTempFile(rootDir=localSlaveTempDir)
        os.remove(tempJob.attrib["stats"])
    
    #Now write the temp job file
    tempFile = getTempFile(rootDir=localSlaveTempDir)
    fileHandle = open(tempFile, 'w') 
    tree = ET.ElementTree(tempJob)
    tree.write(fileHandle)
    fileHandle.close()
    logger.info("Copied the jobs files ready for the job")
    
    if "JOB_FILE" not in command:
        logger.critical("There is no 'JOB_FILE' string in the command to be run to take the job-file argument: %s" % command)
        job.attrib["colour"] = "red" #Update the colour
    else:
        #First load the environment for the job.
        fileHandle = open(job.attrib["environment_file"], 'r')
        environment = cPickle.load(fileHandle)
        fileHandle.close()
        logger.info("Loaded the environment for the process")
        
        #Run the actual command
        tempLogFile = getTempFile(suffix=".log", rootDir=localSlaveTempDir)
        fileHandle = open(tempLogFile, 'w')
        finalCommand = command.replace("JOB_FILE", tempFile)
        if stats != None:
            startTime = time.time()
            startClock = getTotalCpuTime()
        process = subprocess.Popen(finalCommand, shell=True, stdout=fileHandle, stderr=subprocess.STDOUT, env=environment)
            
        sts = os.waitpid(process.pid, 0)
        fileHandle.close()
        truncateFile(tempLogFile, int(job.attrib["max_log_file_size"]))
        
        #Copy across the log file
        system("mv %s %s" % (tempLogFile, job.attrib["log_file"]))
        i = sts[1]
        
        logger.info("Ran the job command=%s with exit status %i" % (finalCommand, i))
        
        if i == 0:
            logger.info("Passed the job, okay")
            
            if stats != None:
                jobTag = ET.SubElement(stats, "job", { "time":str(time.time() - startTime), "clock":str(getTotalCpuTime() - startClock) })
                if os.path.exists(tempJob.attrib["stats"]):
                    jobTag.append(ET.parse(tempJob.attrib["stats"]).getroot())
            
            tempJob = ET.parse(tempFile).getroot()
            job.attrib["colour"] = "black" #Update the colour
            
            #Update the runtime of the stack..
            totalRuntime = float(job.attrib["total_time"])  #This is the estimate runtime of the jobs on the followon stack
            runtime = float(jobToRun.attrib["time"])
            totalRuntime -= runtime
            if totalRuntime < 0.0:
                totalRuntime = 0.0
            
            #The children
            children = job.find("children")
            assert len(children.findall("child")) == 0 #The children
            assert tempJob.find("children") != None
            for child in tempJob.find("children").findall("child"):
                memory, cpu, compTime = getMemoryCpuAndTimeRequirements(job, child)
                ET.SubElement(children, "child", { "command":child.attrib["command"], 
                        "time":str(compTime), "memory":str(memory), "cpu":str(cpu) })
                logger.info("Making a child with command: %s" % (child.attrib["command"]))
            
            #The follow on command
            followOns = job.find("followOns")
            followOns.remove(followOns.findall("followOn")[-1]) #Remove the old job
            if tempJob.attrib.has_key("command"):
                memory, cpu, compTime = getMemoryCpuAndTimeRequirements(job, tempJob)
                ET.SubElement(followOns, "followOn", { "command":tempJob.attrib["command"], 
                        "time":str(compTime), "memory":str(memory), "cpu":str(cpu) })
                ##Add the runtime to the total runtime..
                totalRuntime += compTime
                logger.info("Making a follow on job with command: %s" % tempJob.attrib["command"])
                
            elif len(tempJob.find("children").findall("child")) != 0: #This is to keep the stack of follow on jobs consistent.
                ET.SubElement(followOns, "followOn", { "command":"echo JOB_FILE", "time":"0", "memory":"1000000", "cpu":"1" })
                logger.info("Making a stub follow on job")
            #Write back the runtime, after addin the follow on time and subtracting the time of the run job.
            job.attrib["total_time"] = str(totalRuntime)
        else:
            logger.info("Failed the job")
            job.attrib["colour"] = "red" #Update the colour
    
    #Clean up
    system("rm -rf %s" % (localSlaveTempDir))
    logger.info("Cleaned up by removing temp jobfile (the copy), and the temporary file directory for the job")
Exemple #12
0
def mainLoop(config, batchSystem):
    """This is the main loop from which jobs are issued and processed.
    """    
    waitDuration = float(config.attrib["wait_duration"])
    assert waitDuration >= 0
    rescueJobsFrequency = float(config.attrib["rescue_jobs_frequency"])
    maxJobDuration = float(config.attrib["max_job_duration"])
    assert maxJobDuration >= 0
    logger.info("Got parameters, wait duration %s, rescue jobs frequency: %s max job duration: %s" % \
                (waitDuration, rescueJobsFrequency, maxJobDuration))
    
    #Kill any jobs on the batch system queue from the last time.
    assert len(batchSystem.getIssuedJobIDs()) == 0 #Batch system must start with no active jobs!
    logger.info("Checked batch system has no running jobs and no updated jobs")
    
    jobFiles = config.attrib["job_file_dir"].listFiles()
    logger.info("Got a list of job files")
    
    #Repair the job tree using any .old files
    fixJobsList(config, jobFiles)
    logger.info("Fixed the job files using any .old files")
    
    #Get jobs that were running, or that had failed reset to 'white' status
    restartFailedJobs(config, jobFiles)
    logger.info("Reworked failed jobs")
    
    updatedJobFiles = set() #Jobs whose status needs updating, either because they have finished, or because they need to be started.
    for jobFile in jobFiles:
        job = ET.parse(jobFile).getroot()
        if job.attrib["colour"] not in ("grey", "blue"):
            updatedJobFiles.add(jobFile)
    logger.info("Got the active (non grey/blue) job files")
    
    totalJobFiles = len(jobFiles) #Total number of job files we have.
    jobIDsToJobsHash = {} #A hash of the currently running jobs ids, made by the batch system.
    
    idealJobTime = float(config.attrib["job_time"]) 
    assert idealJobTime > 0.0
    
    maxIssuedJobs = int(config.attrib["max_jobs"]) #The maximum number of jobs to issue to the batch system
    assert maxIssuedJobs >= 1
    
    stats = config.attrib.has_key("stats")
    if stats:
        startTime = time.time()
        startClock = getTotalCpuTime()
    
    logger.info("Starting the main loop")
    timeSinceJobsLastRescued = time.time() - rescueJobsFrequency + 100 #We hack it so that we rescue jobs after the first 100 seconds to get around an apparent parasol bug
    while True:
        if len(updatedJobFiles) > 0:
            logger.debug("Built the jobs list, currently have %i job files, %i jobs to update and %i jobs currently issued" % (totalJobFiles, len(updatedJobFiles), len(jobIDsToJobsHash)))
        
	jobsToIssue = []
        for jobFile in list(updatedJobFiles):
            job = ET.parse(jobFile).getroot()
            assert job.attrib["colour"] not in ("grey", "blue")
            
            if job.attrib["colour"] == "white": #Get ready to start the job
                if len(jobIDsToJobsHash) < maxIssuedJobs:
                    logger.debug("Job: %s is being started" % job.attrib["file"])
                    updatedJobFiles.remove(job.attrib["file"])
                    
                    #Reset the log files for the job.
                    open(job.attrib["slave_log_file"], 'w').close()
                    open(job.attrib["log_file"], 'w').close()
                    
                    job.attrib["colour"] = "grey"
                    #writeJobs([ job ]) #Check point, do this before issuing job, so state is not read until issued
                    
                    #issueJobs([ job ], jobIDsToJobsHash, batchSystem)
		    jobsToIssue.append(job)
                else:
                    logger.debug("Job: %s is not being issued yet because we have %i jobs issued" % (job.attrib["file"], len(jobIDsToJobsHash)))
            elif job.attrib["colour"] == "black": #Job has finished okay
                logger.debug("Job: %s has finished okay" % job.attrib["file"])
                #Deal with stats
                if stats:
                    system("cat %s >> %s" % (job.attrib["stats"], config.attrib["stats"]))
                    open(job.attrib["stats"], 'w').close() #Reset the stats file
                childCount = int(job.attrib["child_count"])
                blackChildCount = int(job.attrib["black_child_count"])
                assert childCount == blackChildCount #Has no currently running child jobs
                #Launch any unborn children
                unbornChildren = job.find("children")
                unbornChild = unbornChildren.find("child")
                if unbornChild != None: #We must give birth to the unborn children
                    logger.debug("Job: %s has children to schedule" % job.attrib["file"])
                    newChildren = []
                    while unbornChild != None:
                        cummulativeChildTime = float(unbornChild.attrib["time"])
                        
                        newJob = createJob(unbornChild.attrib.copy(), job.attrib["file"], config)
                        
                        totalJobFiles += 1
                        updatedJobFiles.add(newJob.attrib["file"])
                        
                        newChildren.append(newJob)
                        unbornChildren.remove(unbornChild)
                        unbornChild = unbornChildren.find("child")
                        
                        #This was code to aggregate groups of children into one job, but we don't do this now
                        #while cummulativeChildTime < idealJobTime and unbornChild != None: #Cummulate a series of children into each job as a stack of jobs (to balance cost of parellelism with cost of running stuff in serially).
                        #    cummulativeChildTime += float(unbornChild.attrib["time"])
                        #    ET.SubElement(newJob.find("followOns"), "followOn", unbornChild.attrib.copy())    
                        #    unbornChildren.remove(unbornChild)
                        #    unbornChild = unbornChildren.find("child")
                        newJob.attrib["total_time"] = str(cummulativeChildTime)
                    
                    updatedJobFiles.remove(job.attrib["file"])
                    job.attrib["child_count"] = str(childCount + len(newChildren))
                    job.attrib["colour"] = "blue" #Blue - has children running.
                    writeJobs([ job ] + newChildren ) #Check point
                    
                elif len(job.find("followOns").findall("followOn")) != 0: #Has another job
                    logger.debug("Job: %s has a new command that we can now issue" % job.attrib["file"])
                    ##Reset the job run info
                    job.attrib["remaining_retry_count"] = config.attrib["retry_count"]
                    job.attrib["colour"] = "white"
                    ##End resetting the job
                    writeJobs([ job ])
                    
                else: #Job has finished, so we can defer to any parent
                    logger.debug("Job: %s is now dead" % job.attrib["file"])
                    job.attrib["colour"] = "dead"
                    if job.attrib.has_key("parent"):
                        parent = ET.parse(job.attrib["parent"]).getroot()
                        assert parent.attrib["colour"] == "blue"
                        assert int(parent.attrib["black_child_count"]) < int(parent.attrib["child_count"])
                        parent.attrib["black_child_count"] = str(int(parent.attrib["black_child_count"]) + 1)
                        if int(parent.attrib["child_count"]) == int(parent.attrib["black_child_count"]):
                            parent.attrib["colour"] = "black"
                            assert parent.attrib["file"] not in updatedJobFiles
                            updatedJobFiles.add(parent.attrib["file"])
                        writeJobs([ job, parent ]) #Check point
                    else:
                        writeJobs([ job ])
                         
            elif job.attrib["colour"] == "red": #Job failed
                logger.critical("Job: %s failed" % job.attrib["file"])
                logger.critical("The log file of the failed job")
                logFile(job.attrib["log_file"], logger.critical)
                logger.critical("The log file of the slave for the failed job")
                logFile(job.attrib["slave_log_file"], logger.critical) #We log the job log file in the main loop
                #Checks
                assert len(job.find("children").findall("child")) == 0
                assert int(job.attrib["child_count"]) == int(job.attrib["black_child_count"])
                
                remainingRetyCount = int(job.attrib["remaining_retry_count"])
                if remainingRetyCount > 0: #Give it another try, maybe there is a bad node somewhere
                    job.attrib["remaining_retry_count"] = str(remainingRetyCount-1)
                    job.attrib["colour"] = "white"
                    logger.critical("Job: %s will be restarted, it has %s goes left" % (job.attrib["file"], job.attrib["remaining_retry_count"]))
                    writeJobs([ job ]) #Check point
                else:
                    assert remainingRetyCount == 0
                    updatedJobFiles.remove(job.attrib["file"])
                    logger.critical("Job: %s is completely failed" % job.attrib["file"])
                    
            else:
                logger.debug("Job: %s is already dead, we'll get rid of it" % job.attrib["file"])
                assert job.attrib["colour"] == "dead"
                updatedJobFiles.remove(job.attrib["file"])
                totalJobFiles -= 1
                deleteJob(job, config) #This could be done earlier, but I like it this way.

        ###End of for loop
        writeJobs(jobsToIssue) #Check point, do this before issuing job, so state is not read until issued
        issueJobs(jobsToIssue, jobIDsToJobsHash, batchSystem)

                
        if len(jobIDsToJobsHash) == 0 and len(updatedJobFiles) == 0:
            logger.info("Only failed jobs and their dependents (%i total) are remaining, so exiting." % totalJobFiles)
            break
        
        if len(updatedJobFiles) > 0:
            updatedJobs = batchSystem.getUpdatedJobs() #Asks the batch system what jobs have been completed.
        else:
            updatedJobs = pauseForUpdatedJobs(batchSystem.getUpdatedJobs) #Asks the batch system what jobs have been completed.
        for jobID in updatedJobs.keys(): #Runs through a map of updated jobs and there status, 
            result = updatedJobs[jobID]
            if jobIDsToJobsHash.has_key(jobID): 
                if result == 0:
                    logger.debug("Batch system is reporting that the job %s ended sucessfully" % jobIDsToJobsHash[jobID])   
                else:
                    logger.critical("Batch system is reporting that the job %s failed with exit value %i" % (jobIDsToJobsHash[jobID], result))  
                processFinishedJob(jobID, result, updatedJobFiles, jobIDsToJobsHash)
            else:
                logger.info("A result seems to already have been processed: %i" % jobID) #T
        
        if time.time() - timeSinceJobsLastRescued >= rescueJobsFrequency: #We only rescue jobs every N seconds
            reissueOverLongJobs(updatedJobFiles, jobIDsToJobsHash, config, batchSystem)
            logger.info("Reissued any over long jobs")
            
            reissueMissingJobs(updatedJobFiles, jobIDsToJobsHash, batchSystem)
            logger.info("Rescued any (long) missing jobs")
            timeSinceJobsLastRescued = time.time()
        #Going to sleep to let the job system catch up.
        time.sleep(waitDuration)
    
    if stats:
        fileHandle = open(config.attrib["stats"], 'a')
        fileHandle.write("<total_time time='%s' clock='%s'/></stats>" % (str(time.time() - startTime), str(getTotalCpuTime() - startClock)))
        fileHandle.close()
    
    logger.info("Finished the main loop")     
    
    return totalJobFiles #Returns number of failed jobs
Exemple #13
0
def runJobTreeStats(jobTree, outputFile):
    system("jobTreeStats --jobTree %s --outputFile %s" % (jobTree, outputFile))
    logger.info("Ran the job-tree stats command apparently okay")
 def run(self):
     logger.info("At the end, this is the contents of the global temp dir...")
     system("ls -l %s" % self.getGlobalTempDir())
     logger.info("And done....")
     self.tempFileTree.destroyTempFiles()
Exemple #15
0
def runJobTreeStatusAndFailIfNotComplete(jobTreeDir):
    command = "jobTreeStatus --jobTree %s --failIfNotComplete --verbose" % jobTreeDir
    system(command)
Exemple #16
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir) #Cleanup the job tree in case it hasn't already been cleaned up.
Exemple #17
0
 def run(self):
     system("rm -rf %s" % self.getGlobalTempDir())
Exemple #18
0
 def execute(self, jobFile):
     job = ET.parse(jobFile).getroot()
     setLogLevel(job.attrib["log_level"])
     logger.info("Setup logging with level: %s" % job.attrib["log_level"])
     self.tempDirAccessed = False
     self.localTempDir = job.attrib["local_temp_dir"]
     self.globalTempDir = job.attrib["global_temp_dir"]
     maxTime = float(job.attrib["job_time"])
     memory = int(job.attrib["available_memory"])
     cpu = int(job.attrib["available_cpu"])
     
     if job.attrib.has_key("stats"):
         stats = ET.Element("stack")
         startTime = time.time()
         startClock = time.clock()
     else:
         stats = None
     
     newChildren = [] #List to add all the children to before we package them
     #off into stacks
     newChildCommands = [] #Ditto for the child commands
     newFollowOns = [] #Ditto for the follow-ons 
     while self.hasRemaining():
         if stats != None: #Getting the runtime of the stats module
             targetStartTime = time.time()
             targetStartClock = getTotalCpuTime()
             
         target = self.popTarget()
         target.setStack(self)
         #Debug check that we have the right amount of CPU and memory for the job in hand
         targetMemory = target.getMemory()
         if targetMemory != sys.maxint:
             assert targetMemory <= memory
         targetCpu = target.getCpu()
         if targetCpu != sys.maxint:
             assert targetCpu <= cpu
         #Run the target, first cleanup then run.
         target.run()
         #Cleanup after the target
         if self.tempDirAccessed:
             system("rm -rf %s/*" % self.localTempDir)
             self.tempDirAccessed = False
         #Handle the follow on
         followOn = target.getFollowOn()
         #if target.__class__ != CleanupGlobalTempDirTarget and followOn == None:
         #    followOn = CleanupGlobalTempDirTarget()
         if followOn != None: #Target to get rid of follow on when done.
             if target.isGlobalTempDirSet():
                 followOn.setGlobalTempDir(target.getGlobalTempDir())
             newFollowOns.append(followOn)
         
         #Now add the children to the newChildren stack
         newChildren += target.getChildren()
         
         #Now add the child commands to the newChildCommands stack
         newChildCommands += target.getChildCommands()
         
         if stats != None:
             ET.SubElement(stats, "target", { "time":str(time.time() - targetStartTime), 
                                             "clock":str(getTotalCpuTime() - targetStartClock),
                                             "class":".".join((target.__class__.__name__,)),
                                             "e_time":str(target.getRunTime())})
                 
     
     #######
     #Now build the new stacks and corresponding jobs
     #######
     
     #First add all the follow ons to the existing stack and make it a follow on job for job-tree
     assert not self.hasRemaining()
     
     #First sort out the follow on job
     if len(newFollowOns) > 0: #If we have follow ons
         followOnRuntime = sum([ followOn.getRunTime() for followOn in newFollowOns ])
         
         if followOnRuntime > maxTime: #We create a parallel list of follow ons
             followOnStack = Stack(ParallelFollowOnTarget(newFollowOns))
         else:
             followOnStack = Stack(newFollowOns.pop())
             while len(newFollowOns) > 0:
                 followOnStack.addTarget(newFollowOns.pop())
     
         job.attrib["command"] = followOnStack.makeRunnable(self.globalTempDir)
         job.attrib["time"] = str(followOnStack.getRunTime())
         followOnMemory = followOnStack.getMemory()
         assert not job.attrib.has_key("memory")
         if followOnMemory != sys.maxint:
             job.attrib["memory"] = str(followOnMemory)
         assert not job.attrib.has_key("cpu")
         followOnCpu = followOnStack.getCpu()
         if followOnCpu != sys.maxint:
             job.attrib["cpu"] = str(followOnCpu)
           
     #Now build stacks of children..
     childrenTag = job.find("children")
     while len(newChildren) > 0:
         childStack = Stack(newChildren.pop())
         while len(newChildren) > 0 and childStack.getRunTime() <= maxTime:
             childStack.addTarget(newChildren.pop())
         childJob = ET.SubElement(childrenTag, "child", { "command":childStack.makeRunnable(self.globalTempDir),
                                           "time":str(childStack.getRunTime()) })
         childMemory = childStack.getMemory()
         assert not childJob.attrib.has_key("memory")
         if childMemory != sys.maxint:
             childJob.attrib["memory"] = str(childMemory)
         assert not childJob.attrib.has_key("cpu")
         childCpu = childStack.getCpu()
         if childCpu != sys.maxint:
             childJob.attrib["cpu"] = str(childCpu)
     
     #Now build jobs for each child command
     for childCommand, runTime in newChildCommands:
         ET.SubElement(childrenTag, "child", { "command":str(childCommand),
                                           "time":str(runTime) })
 
     #Now write the updated job file
     fileHandle = open(jobFile, 'w')
     ET.ElementTree(job).write(fileHandle)
     fileHandle.close()
     
     #Finish up the stats
     if stats != None:
         stats.attrib["time"] = str(time.time() - startTime)
         stats.attrib["clock"] = str(getTotalCpuTime() - startClock)
         fileHandle = open(job.attrib["stats"], 'w')
         ET.ElementTree(stats).write(fileHandle)
         fileHandle.close()
Exemple #19
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     self.tempFileTree.destroyTempFiles()
     system("rm -rf %s %s" % (self.jobTreeDir, self.tempFileTreeDir)) #Cleanup the job tree in case it hasn't already been cleaned up.