def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option("--treePointer", dest="treePointerFile", help="File containing pointer to the tree data", default="None") options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointerFile).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") for child in tree.find("children").findall("child"): #Make the chuld tree pointer childTreePointerFile = makeTreePointer(child.attrib["file"], getTempFile(rootDir=job.attrib["global_temp_dir"])) #Make the child command unbornChild = ET.SubElement(job.find("children"), "child") command = "jobTreeTest_CommandFirst.py --treePointer %s --job JOB_FILE" % \ (childTreePointerFile,) unbornChild.attrib["command"] = command if random.random() > 0.2: unbornChild.attrib["time"] = str(random.random() * 10) #Make the child tree pointer ET.SubElement(treePointer.find("children"), "child", { "file":childTreePointerFile }) job.attrib["command"] = "jobTreeTest_CommandSecond.py --treePointer %s --job JOB_FILE" % \ (options.treePointerFile,) logger.info("Made new command") fileHandle = open(options.jobFile, 'w') ET.ElementTree(job).write(fileHandle) fileHandle.close() logger.info("Updated the job file") print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs print "Checking that we can report to std out" if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option("--treePointer", dest="treePointer", help="File containing pointer to the tree data", default="None") options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs print "Checking that we can report to std out" job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointer).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") i = 0 children = tree.find("children").findall("child") if len(children) > 0: for child in children: #Parse the child XML tree childTree = ET.parse(child.attrib["file"]).getroot() i += int(childTree.attrib["count"]) else: i = 1 tree.attrib["count"] = str(i) logger.info("Calculated the leaf count: %i" % i) fileHandle = open(treePointer.attrib["file"], 'w') ET.ElementTree(tree).write(fileHandle) fileHandle.close() logger.info("Updated the tree file: %s" % treePointer.attrib["file"]) for childPointer in treePointer.find("children").findall("child"): if os.path.isfile(childPointer.attrib["file"]): os.remove(childPointer.attrib["file"]) logger.info("Removed the child pointer files") logger.info("No need to update the job file, as we didn't make anything new!") if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def execute(self, jobFile): job = ET.parse(jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Setup logging with level: %s" % job.attrib["log_level"]) self.tempDirAccessed = False self.localTempDir = job.attrib["local_temp_dir"] self.globalTempDir = job.attrib["global_temp_dir"] maxTime = float(job.attrib["job_time"]) memory = int(job.attrib["available_memory"]) cpu = int(job.attrib["available_cpu"]) if job.attrib.has_key("stats"): stats = ET.Element("stack") startTime = time.time() startClock = time.clock() else: stats = None newChildren = [] #List to add all the children to before we package them #off into stacks newChildCommands = [] #Ditto for the child commands newFollowOns = [] #Ditto for the follow-ons while self.hasRemaining(): if stats != None: #Getting the runtime of the stats module targetStartTime = time.time() targetStartClock = getTotalCpuTime() target = self.popTarget() target.setStack(self) #Debug check that we have the right amount of CPU and memory for the job in hand targetMemory = target.getMemory() if targetMemory != sys.maxint: assert targetMemory <= memory targetCpu = target.getCpu() if targetCpu != sys.maxint: assert targetCpu <= cpu #Run the target, first cleanup then run. target.run() #Cleanup after the target if self.tempDirAccessed: system("rm -rf %s/*" % self.localTempDir) self.tempDirAccessed = False #Handle the follow on followOn = target.getFollowOn() #if target.__class__ != CleanupGlobalTempDirTarget and followOn == None: # followOn = CleanupGlobalTempDirTarget() if followOn != None: #Target to get rid of follow on when done. if target.isGlobalTempDirSet(): followOn.setGlobalTempDir(target.getGlobalTempDir()) newFollowOns.append(followOn) #Now add the children to the newChildren stack newChildren += target.getChildren() #Now add the child commands to the newChildCommands stack newChildCommands += target.getChildCommands() if stats != None: ET.SubElement(stats, "target", { "time":str(time.time() - targetStartTime), "clock":str(getTotalCpuTime() - targetStartClock), "class":".".join((target.__class__.__name__,)), "e_time":str(target.getRunTime())}) for message in target.getMasterLoggingMessages(): if job.find("messages") == None: ET.SubElement(job, "messages") ET.SubElement(job.find("messages"), "message", { "message": message} ) ####### #Now build the new stacks and corresponding jobs ####### #First add all the follow ons to the existing stack and make it a follow on job for job-tree assert not self.hasRemaining() #First sort out the follow on job if len(newFollowOns) > 0: #If we have follow ons followOnRuntime = sum([ followOn.getRunTime() for followOn in newFollowOns ]) if followOnRuntime > maxTime: #We create a parallel list of follow ons followOnStack = Stack(ParallelFollowOnTarget(newFollowOns)) else: followOnStack = Stack(newFollowOns.pop()) while len(newFollowOns) > 0: followOnStack.addTarget(newFollowOns.pop()) job.attrib["command"] = followOnStack.makeRunnable(self.globalTempDir) job.attrib["time"] = str(followOnStack.getRunTime()) followOnMemory = followOnStack.getMemory() assert not job.attrib.has_key("memory") if followOnMemory != sys.maxint: job.attrib["memory"] = str(followOnMemory) assert not job.attrib.has_key("cpu") followOnCpu = followOnStack.getCpu() if followOnCpu != sys.maxint: job.attrib["cpu"] = str(followOnCpu) #Now build stacks of children.. childrenTag = job.find("children") while len(newChildren) > 0: childStack = Stack(newChildren.pop()) while len(newChildren) > 0 and childStack.getRunTime() <= maxTime: childStack.addTarget(newChildren.pop()) childJob = ET.SubElement(childrenTag, "child", { "command":childStack.makeRunnable(self.globalTempDir), "time":str(childStack.getRunTime()) }) childMemory = childStack.getMemory() assert not childJob.attrib.has_key("memory") if childMemory != sys.maxint: childJob.attrib["memory"] = str(childMemory) assert not childJob.attrib.has_key("cpu") childCpu = childStack.getCpu() if childCpu != sys.maxint: childJob.attrib["cpu"] = str(childCpu) #Now build jobs for each child command for childCommand, runTime in newChildCommands: ET.SubElement(childrenTag, "child", { "command":str(childCommand), "time":str(runTime) }) #Now write the updated job file fileHandle = open(jobFile, 'w') ET.ElementTree(job).write(fileHandle) fileHandle.close() #Finish up the stats if stats != None: stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(getTotalCpuTime() - startClock) fileHandle = open(job.attrib["stats"], 'w') ET.ElementTree(stats).write(fileHandle) fileHandle.close()
def main(): sys.path += [ sys.argv[1] ] sys.argv.remove(sys.argv[1]) #Now we can import all the stuff.. from jobTree.src.bioio import getBasicOptionParser from jobTree.src.bioio import parseBasicOptions from jobTree.src.bioio import logger from jobTree.src.bioio import addLoggingFileHandler from jobTree.src.bioio import setLogLevel from jobTree.src.bioio import getTotalCpuTime from jobTree.src.master import writeJobs ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") options, args = parseBasicOptions(parser) assert len(args) == 0 ########################################## #Parse the job. ########################################## job = ET.parse(options.jobFile).getroot() ########################################## #Setup the logging ########################################## #Setup the logging setLogLevel(job.attrib["log_level"]) addLoggingFileHandler(job.attrib["slave_log_file"], rotatingLogging=False) logger.info("Parsed arguments and set up logging") ########################################## #Setup the stats, if requested ########################################## if job.attrib.has_key("stats"): startTime = time.time() startClock = time.clock() stats = ET.Element("slave") else: stats = None ########################################## #Run the script. ########################################## maxTime = float(job.attrib["job_time"]) assert maxTime > 0.0 assert maxTime < sys.maxint jobToRun = job.find("followOns").findall("followOn")[-1] memoryAvailable = int(jobToRun.attrib["memory"]) cpuAvailable = int(jobToRun.attrib["cpu"]) while True: processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats) if job.attrib["colour"] != "black": logger.info("Exiting the slave because of a failed job") break totalRuntime = float(job.attrib["total_time"]) #This is the estimate runtime of the jobs on the followon stack childrenNode = job.find("children") childrenList = childrenNode.findall("child") #childRuntime = sum([ float(child.attrib["time"]) for child in childrenList ]) if len(childrenList) >= 2: # or totalRuntime + childRuntime > maxTime: #We are going to have to return to the parent logger.info("No more jobs can run in series by this slave, its got %i children" % len(childrenList)) break followOns = job.find("followOns") while len(childrenList) > 0: child = childrenList.pop() childrenNode.remove(child) totalRuntime += float(child.attrib["time"]) ET.SubElement(followOns, "followOn", child.attrib.copy()) #assert totalRuntime <= maxTime + 1 #The plus one second to avoid unimportant rounding errors job.attrib["total_time"] = str(totalRuntime) assert len(childrenNode.findall("child")) == 0 if len(followOns.findall("followOn")) == 0: logger.info("No more jobs can run by this slave as we have exhausted the follow ons") break #Get the next job and see if we have enough cpu and memory to run it.. jobToRun = job.find("followOns").findall("followOn")[-1] if int(jobToRun.attrib["memory"]) > memoryAvailable: logger.info("We need more memory for the next job, so finishing") break if int(jobToRun.attrib["cpu"]) > cpuAvailable: logger.info("We need more cpus for the next job, so finishing") break ##Updated the job so we can start the next loop cycle job.attrib["colour"] = "grey" writeJobs([ job ]) logger.info("Updated the status of the job to grey and starting the next job") #Write back the job file with the updated jobs, using the checkpoint method. writeJobs([ job ]) logger.info("Written out an updated job file") logger.info("Finished running the chain of jobs on this node") ########################################## #Finish up the stats ########################################## if stats != None: stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(getTotalCpuTime() - startClock) fileHandle = open(job.attrib["stats"], 'w') ET.ElementTree(stats).write(fileHandle) fileHandle.close()