def testCopySubRangeOfFile(self): for test in xrange(self.testNo): tempDir = getTempDirectory(os.getcwd()) tempFile = getTempFile(rootDir=tempDir) outputFile = getTempFile(rootDir=tempDir) makeFileToSort(tempFile) fileSize = os.path.getsize(tempFile) assert fileSize > 0 fileStart = random.choice(xrange(0, fileSize)) fileEnd = random.choice(xrange(fileStart, fileSize)) copySubRangeOfFile(tempFile, fileStart, fileEnd, outputFile) l = open(outputFile, 'r').read() l2 = open(tempFile, 'r').read()[fileStart:fileEnd] checkEqual(l, l2) system("rm -rf %s" % tempDir)
def killMasterAndParasol(): """Method to destroy master process """ tempFile = getTempFile() popen("ps -a", tempFile) fileHandle = open(tempFile, 'r') line = fileHandle.readline() #Example parasol state lines: #67401 ttys002 0:00.06 /Users/benedictpaten/kent/src/parasol/bin/paraNode start -hub=localhost -log=/tmp/node.2009-07-08.log -umask=002 -userPath=bin:bin/x86_64:bin/i #67403 ttys002 0:00.65 /Users/benedictpaten/kent/src/parasol/bin/paraHub -log=/tmp/hub.2009-07-08.log machineList subnet=127.0.0 #68573 ttys002 0:00.00 /Users/benedictpaten/kent/src/parasol/bin/paraNode start -hub=localhost -log=/tmp/node.2009-07-08.log -umask=002 -userPath=bin:bin/x86_64:bin/i while line != '': tokens = line.split() if 'paraNode' in line or 'paraHub' in line: if random.random() > 0.5: i = os.system("kill %i" % int(tokens[0])) logger.info("Tried to kill parasol process: %i, line: %s, exit value: %i" % (int(tokens[0]), line, i)) break elif 'jobTreeMaster.py' in line: logger.info("Have job tree master line") if random.random() > 0.5: i = os.system("kill %i" % int(tokens[0])) logger.info("Tried to kill master process: %i, line: %s, exit value: %i" % (int(tokens[0]), line, i)) break line = fileHandle.readline() fileHandle.close() os.remove(tempFile) parasolRestart()
def parasolRestart(): """Function starts the parasol hub and node. """ parasolStop() while True: machineList = os.path.join(workflowRootPath(), "workflow", "jobTree", "machineList") #pathEnvVar = os.environ["PATH"] os.system("paraNode start -hub=localhost") #-umask=002 -userPath=%s -sysPath=%s" % (pathEnvVar, pathEnvVar)) os.system("paraHub %s subnet=127.0.0 &" % (machineList,)) tempFile = getTempFile() dead = True try: popen("parasol status", tempFile) fileHandle = open(tempFile, 'r') line = fileHandle.readline() while line != '': if "Nodes dead" in line: print line if int(line.split()[-1]) == 0: dead = False line = fileHandle.readline() fileHandle.close() except RuntimeError: pass os.remove(tempFile) if not dead: break else: logger.info("Tried to restart the parasol process, but failed, will try again") parasolStop() time.sleep(5) logger.info("Restarted the parasol process")
def testMerge(self): for test in xrange(self.testNo): tempDir = getTempDirectory(os.getcwd()) tempFile1 = getTempFile(rootDir=tempDir) tempFile2 = getTempFile(rootDir=tempDir) tempFile3 = getTempFile(rootDir=tempDir) makeFileToSort(tempFile1) makeFileToSort(tempFile2) sort(tempFile1) sort(tempFile2) merge(tempFile1, tempFile2, tempFile3) lines1 = loadFile(tempFile1) + loadFile(tempFile2) lines1.sort() lines2 = loadFile(tempFile3) checkEqual(lines1, lines2) system("rm -rf %s" % tempDir)
def run(self): length = self.fileEnd - self.fileStart assert length >= 0 if length > self.N: midPoint = getMidPoint(self.inputFile, self.fileStart, self.fileEnd) assert midPoint >= self.fileStart assert midPoint+1 < self.fileEnd #We will subdivide the file tempFile1 = getTempFile(rootDir=self.getGlobalTempDir()) tempFile2 = getTempFile(rootDir=self.getGlobalTempDir()) self.addChildTarget(Down(self.inputFile, self.fileStart, midPoint+1, self.N, tempFile1)) self.addChildTarget(Down(self.inputFile, midPoint+1, self.fileEnd, self.N, tempFile2)) #Add one to avoid the newline self.setFollowOnTarget(Up(tempFile1, tempFile2, self.outputFile)) else: #We can sort this bit of the file copySubRangeOfFile(self.inputFile, self.fileStart, self.fileEnd, self.outputFile) sort(self.outputFile)
def testJobTreeStats_SortSimple(self): """Tests the jobTreeStats utility using the scriptTree_sort example. """ for test in xrange(self.testNo): tempDir = getTempDirectory(os.getcwd()) tempFile = getTempFile(rootDir=tempDir) outputFile = getTempFile(rootDir=tempDir) jobTreeDir = os.path.join(tempDir, "jobTree") lines=100000 maxLineLength=10 N=1000 makeFileToSort(tempFile, lines, maxLineLength) #Sort the file command = "scriptTreeTest_Sort.py --jobTree %s --logLevel=DEBUG --fileToSort=%s --N %s --stats --jobTime 0.5" % (jobTreeDir, tempFile, N) system(command) #Now get the stats system("jobTreeStats --jobTree %s --outputFile %s" % (jobTreeDir, outputFile)) #Cleanup system("rm -rf %s" % tempDir)
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option( "--treePointer", dest="treePointerFile", help="File containing pointer to the tree data", default="None" ) options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointerFile).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") for child in tree.find("children").findall("child"): # Make the chuld tree pointer childTreePointerFile = makeTreePointer(child.attrib["file"], getTempFile(rootDir=job.attrib["global_temp_dir"])) # Make the child command unbornChild = ET.SubElement(job.find("children"), "child") command = "jobTreeTest_CommandFirst.py --treePointer %s --job JOB_FILE" % (childTreePointerFile,) unbornChild.attrib["command"] = command if random.random() > 0.2: unbornChild.attrib["time"] = str(random.random() * 10) # Make the child tree pointer ET.SubElement(treePointer.find("children"), "child", {"file": childTreePointerFile}) job.attrib["command"] = "jobTreeTest_CommandSecond.py --treePointer %s --job JOB_FILE" % (options.treePointerFile,) logger.info("Made new command") fileHandle = open(options.jobFile, "w") ET.ElementTree(job).write(fileHandle) fileHandle.close() logger.info("Updated the job file") print >> sys.stderr, "Checking that we can report to std err" # These lines should end up in the logs print "Checking that we can report to std out" if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def makeRunnable(self, tempDir): pickleFile = getTempFile(".pickle", tempDir) fileHandle = open(pickleFile, 'w') cPickle.dump(self, fileHandle, cPickle.HIGHEST_PROTOCOL) fileHandle.close() i = set() for target in self.stack: for importString in target.importStrings: i.add(importString) classNames = " ".join(i) return "scriptTree --job JOB_FILE --target %s %s" % (pickleFile, classNames)
def testGetMidPoint(self): for test in xrange(self.testNo): tempDir = getTempDirectory(os.getcwd()) tempFile = getTempFile(rootDir=tempDir) makeFileToSort(tempFile) l = open(tempFile, 'r').read() fileSize = os.path.getsize(tempFile) midPoint = getMidPoint(tempFile, 0, fileSize) print "the mid point is %i of a file of %i bytes woth byte" % (midPoint, fileSize) assert midPoint < fileSize assert l[midPoint] == '\n' assert midPoint >= 0 system("rm -rf %s" % tempDir)
def run(self): while True: command, logFile, jobID = self.inputQueue.get() #fnull = open(os.devnull, 'w') #Pipe the output to dev/null (it is caught by the slave and will be reported if there is an error) tempLogFile = getTempFile() fileHandle = open(tempLogFile, 'w') process = subprocess.Popen(command, shell=True, stdout = fileHandle, stderr = fileHandle) sts = os.waitpid(process.pid, 0) fileHandle.close() #fnull.close() if os.path.exists(tempLogFile): system("mv %s %s" % (tempLogFile, logFile)) self.outputQueue.put((command, sts[1], jobID)) self.inputQueue.task_done()
def scriptTree_SortTest(testNo, batchSystem, lines=100000, maxLineLength=10, N=1000): """Tests scriptTree/jobTree by sorting a file in parallel. """ for test in xrange(testNo): tempDir = getTempDirectory(os.getcwd()) tempFile = getTempFile(rootDir=tempDir) jobTreeDir = os.path.join(tempDir, "jobTree") makeFileToSort(tempFile, lines=lines, maxLineLength=maxLineLength) #First make our own sorted version fileHandle = open(tempFile, 'r') l = fileHandle.readlines() l.sort() fileHandle.close() #Sort the file command = "scriptTreeTest_Sort.py --jobTree %s --logLevel=DEBUG --fileToSort=%s --N %i --batchSystem %s --jobTime 1.0" % (jobTreeDir, tempFile, N, batchSystem) system(command) #Now check the file is properly sorted.. #Now get the sorted file fileHandle = open(tempFile, 'r') l2 = fileHandle.readlines() fileHandle.close() checkEqual(l, l2) system("rm -rf %s" % tempDir)
def processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats): from workflow.jobTree.lib.bioio import getTempFile from workflow.jobTree.lib.bioio import getTempDirectory from workflow.jobTree.lib.bioio import logger from workflow.jobTree.lib.bioio import system from workflow.jobTree.lib.bioio import getTotalCpuTime assert len(job.find("children").findall("child")) == 0 assert int(job.attrib["child_count"]) == int(job.attrib["black_child_count"]) command = jobToRun.attrib["command"] #Copy the job file to be edited tempJob = ET.Element("job") ET.SubElement(tempJob, "children") #Log for job tempJob.attrib["log_level"] = job.attrib["log_level"] #Time length of 'ideal' job before further parallelism is required tempJob.attrib["job_time"] = job.attrib["job_time"] #Dir to put all the temp files in. localSlaveTempDir = getTempDirectory() #Temp file dirs for job. localTempDir = getTempDirectory(rootDir=localSlaveTempDir) tempJob.attrib["local_temp_dir"] = localTempDir depth = len(job.find("followOns").findall("followOn")) tempJob.attrib["global_temp_dir"] = os.path.join(job.attrib["global_temp_dir"], str(depth)) if not os.path.isdir(tempJob.attrib["global_temp_dir"]): #Ensures that the global temp dirs of each level are kept separate. os.mkdir(tempJob.attrib["global_temp_dir"]) os.chmod(tempJob.attrib["global_temp_dir"], 0777) if os.path.isdir(os.path.join(job.attrib["global_temp_dir"], str(depth+1))): system("rm -rf %s" % os.path.join(job.attrib["global_temp_dir"], str(depth+1))) assert not os.path.isdir(os.path.join(job.attrib["global_temp_dir"], str(depth+2))) #Deal with memory and cpu requirements (this pass tells the running job how much cpu and memory they have, #according to the batch system tempJob.attrib["available_memory"] = str(memoryAvailable) tempJob.attrib["available_cpu"] = str(cpuAvailable) if stats != None: tempJob.attrib["stats"] = getTempFile(rootDir=localSlaveTempDir) os.remove(tempJob.attrib["stats"]) #Now write the temp job file tempFile = getTempFile(rootDir=localSlaveTempDir) fileHandle = open(tempFile, 'w') tree = ET.ElementTree(tempJob) tree.write(fileHandle) fileHandle.close() logger.info("Copied the jobs files ready for the job") if "JOB_FILE" not in command: logger.critical("There is no 'JOB_FILE' string in the command to be run to take the job-file argument: %s" % command) job.attrib["colour"] = "red" #Update the colour else: #First load the environment for the job. fileHandle = open(job.attrib["environment_file"], 'r') environment = cPickle.load(fileHandle) fileHandle.close() logger.info("Loaded the environment for the process") #Run the actual command tempLogFile = getTempFile(suffix=".log", rootDir=localSlaveTempDir) fileHandle = open(tempLogFile, 'w') finalCommand = command.replace("JOB_FILE", tempFile) if stats != None: startTime = time.time() startClock = getTotalCpuTime() process = subprocess.Popen(finalCommand, shell=True, stdout=fileHandle, stderr=subprocess.STDOUT, env=environment) sts = os.waitpid(process.pid, 0) fileHandle.close() truncateFile(tempLogFile, int(job.attrib["max_log_file_size"])) #Copy across the log file system("mv %s %s" % (tempLogFile, job.attrib["log_file"])) i = sts[1] logger.info("Ran the job command=%s with exit status %i" % (finalCommand, i)) if i == 0: logger.info("Passed the job, okay") if stats != None: jobTag = ET.SubElement(stats, "job", { "time":str(time.time() - startTime), "clock":str(getTotalCpuTime() - startClock) }) if os.path.exists(tempJob.attrib["stats"]): jobTag.append(ET.parse(tempJob.attrib["stats"]).getroot()) tempJob = ET.parse(tempFile).getroot() job.attrib["colour"] = "black" #Update the colour #Update the runtime of the stack.. totalRuntime = float(job.attrib["total_time"]) #This is the estimate runtime of the jobs on the followon stack runtime = float(jobToRun.attrib["time"]) totalRuntime -= runtime if totalRuntime < 0.0: totalRuntime = 0.0 #The children children = job.find("children") assert len(children.findall("child")) == 0 #The children assert tempJob.find("children") != None for child in tempJob.find("children").findall("child"): memory, cpu, compTime = getMemoryCpuAndTimeRequirements(job, child) ET.SubElement(children, "child", { "command":child.attrib["command"], "time":str(compTime), "memory":str(memory), "cpu":str(cpu) }) logger.info("Making a child with command: %s" % (child.attrib["command"])) #The follow on command followOns = job.find("followOns") followOns.remove(followOns.findall("followOn")[-1]) #Remove the old job if tempJob.attrib.has_key("command"): memory, cpu, compTime = getMemoryCpuAndTimeRequirements(job, tempJob) ET.SubElement(followOns, "followOn", { "command":tempJob.attrib["command"], "time":str(compTime), "memory":str(memory), "cpu":str(cpu) }) ##Add the runtime to the total runtime.. totalRuntime += compTime logger.info("Making a follow on job with command: %s" % tempJob.attrib["command"]) elif len(tempJob.find("children").findall("child")) != 0: #This is to keep the stack of follow on jobs consistent. ET.SubElement(followOns, "followOn", { "command":"echo JOB_FILE", "time":"0", "memory":"1000000", "cpu":"1" }) logger.info("Making a stub follow on job") #Write back the runtime, after addin the follow on time and subtracting the time of the run job. job.attrib["total_time"] = str(totalRuntime) else: logger.info("Failed the job") job.attrib["colour"] = "red" #Update the colour #Clean up system("rm -rf %s" % (localSlaveTempDir)) logger.info("Cleaned up by removing temp jobfile (the copy), and the temporary file directory for the job")
def run(self): tempOutputFile = getTempFile(rootDir=self.getGlobalTempDir()) self.addChildTarget(Down(self.inputFile, 0, os.path.getsize(self.inputFile), self.N, tempOutputFile)) self.setFollowOnTarget(Cleanup(tempOutputFile, self.inputFile))