def run(self): setLogLevel("DEBUG") logger.info("Adding experiments to jobTree\n") if self.options.inputInfo: self.addChildTarget(PreProcess(self.options)) else: self.addChildTarget(Start(self.options))
def run(self): setLogLevel("DEBUG") infiles = os.listdir(self.options.indir) for file in infiles: if re.search('\.fastq$', file) or re.search('\.fq$', file): sample = os.path.basename(file).split('.')[0] self.addChildTarget( Sample(sample, self.options) )
def run(self): setLogLevel("DEBUG") options = self.options system("mkdir -p %s" %(options.outdir)) experiments, samples = getExperiments(options.cactusdir) for i, exp in enumerate(experiments): sample = samples[i] logger.info("Experiment %s, sample %s\n" %(exp, sample) ) self.addChildTarget( RunExperiment(options, exp, sample) ) #Map to other refs, the structure of the directories is going to be: #outdir/ # otherRefs/ # sampleNA*/ # hg19/ # apd/ # ... refdir = os.path.join(options.outdir, "otherRefs") system("mkdir -p %s" %refdir) for sample in samples: sampleDir = os.path.join(refdir, sample) readdir = os.path.join(self.options.readdir, sample) system("mkdir -p %s" %sampleDir) for ref in self.options.refs: rdir = os.path.join(sampleDir, ref) system("mkdir -p %s" %rdir) self.addChildTarget( RunMapping(self.options, os.path.join(self.options.refdir, ref), rdir, readdir) ) #Done mapping, now drawPlots self.setFollowOnTarget( Plots(options.outdir, os.path.join(options.outdir, "plots"), options.cleanup) )
def run(self): setLogLevel("DEBUG") logger.info("Adding experiments to jobTree\n") if self.options.inputInfo: self.addChildTarget( PreProcess(self.options) ) else: self.addChildTarget( Start(self.options) )
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option("--treePointer", dest="treePointerFile", help="File containing pointer to the tree data", default="None") options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointerFile).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") for child in tree.find("children").findall("child"): #Make the chuld tree pointer childTreePointerFile = makeTreePointer(child.attrib["file"], getTempFile(rootDir=job.attrib["global_temp_dir"])) #Make the child command unbornChild = ET.SubElement(job.find("children"), "child") command = "jobTreeTest_CommandFirst.py --treePointer %s --job JOB_FILE" % \ (childTreePointerFile,) unbornChild.attrib["command"] = command if random.random() > 0.2: unbornChild.attrib["time"] = str(random.random() * 10) #Make the child tree pointer ET.SubElement(treePointer.find("children"), "child", { "file":childTreePointerFile }) job.attrib["command"] = "jobTreeTest_CommandSecond.py --treePointer %s --job JOB_FILE" % \ (options.treePointerFile,) logger.info("Made new command") fileHandle = open(options.jobFile, 'w') ET.ElementTree(job).write(fileHandle) fileHandle.close() logger.info("Updated the job file") print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs print "Checking that we can report to std out" if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def run(self): setLogLevel("DEBUG") globalTempDir = self.getGlobalTempDir() for i in xrange( self.options.samnum ) : samplingdir = os.path.join(globalTempDir, "%d" %i) system("mkdir -p %s" %samplingdir) self.addChildTarget( RunSampling(samplingdir, self.options) ) self.setFollowOnTarget( RunDrawNt2aaDist(globalTempDir, self.options) )
def run(self): setLogLevel("DEBUG") system("mkdir -p %s" %self.options.outdir) system("chmod ug+xrw %s" %self.options.outdir) regions = getList(self.options.regions) logger.info("Regions: %s\n" %(" ".join(regions))) for r in regions: self.addChildTarget(RunRegion(r, self.options)) self.setFollowOnTarget(AggregateResults(self.options))
def run(self): setLogLevel("DEBUG") system("mkdir -p %s" % self.options.outdir) system("chmod ug+xrw %s" % self.options.outdir) regions = getList(self.options.regions) logger.info("Regions: %s\n" % (" ".join(regions))) for r in regions: self.addChildTarget(RunRegion(r, self.options)) self.setFollowOnTarget(AggregateResults(self.options))
def run(self): #-------------------------------------------------------------------------------------- #Get parameter sets. For each set, issue job to run cactus on different simulation data #-------------------------------------------------------------------------------------- setLogLevel("DEBUG") system("rm -rf %s*" % self.options.outputDir) logger.info("Remove output directory if exists\n") #Convert true.mfa of each simulation to maf format #simTrueMafDir = os.path.join(self.options.outputDir, "sim") simTrueMafDir = self.options.simTrueMafDir check_dir(simTrueMafDir) for sim in self.options.sim: #convert mfa file of current simulation into MAF format: sim = modify_dirname(sim) simName = getRootDir(sim) trueMAF = os.path.join(simTrueMafDir, "%s_true.maf" % (simName)) if not os.path.exists(trueMAF): trueMFA = os.path.join(sim, "true.mfa") runEvalMFAToMAF(trueMFA, trueMAF) logger.info("Converted true.mfa of simulation %s to %s\n" % (sim, trueMAF)) else: logger.info("TrueMAF already exists: %s\n" % (trueMAF)) for parameterFile, parameterName in getParameters(self.options.config): outDir = os.path.join(self.options.outputDir, parameterName) #system("rm -rf %s" % outDir) os.mkdir(outDir) system("mv %s %s/" % (parameterFile, outDir)) logger.info( "Created output directory %s for parameter set %s and moved config file to that directory\n" % (outDir, parameterName)) paraFile = os.path.join(outDir, 'param.xml') statsDir = os.path.join(outDir, "stats") os.mkdir(statsDir) logger.info("Created directory for stats files: %s\n" % (statsDir)) self.addChildTarget( CactusTuningSimulationsWrapper(self.options, paraFile, outDir)) logger.info( "Added CactusTuningSimulationsWrapper as child for parameter %s\n" % (parameterName)) #Summarize results #self.setFollowOnTarget(CactusTuningSummary(self.options)) logger.info("Added CactusTuningSummary\n")
def run(self): setLogLevel("DEBUG") numSim = self.options.numSim maxSimPerRun = self.options.maxSimPerRun sims = min( [maxSimPerRun, numSim - self.doneSims] ) outdir = os.path.join( self.options.outdir, "sims", str(self.doneSims) ) #outdir/sims/batchId system("mkdir -p %s" %outdir) self.addChildTarget( SimulationBatch(self.options, outdir, sims, self.doneSims) ) doneSims = self.doneSims + sims if doneSims < numSim: self.setFollowOnTarget( Setup(self.options, doneSims) ) #recusion call for next batch of simulations else: summarydir = os.path.join(self.options.outdir, "summary") #outdir/summary statsdir = os.path.join(self.options.outdir, "stats") #outdir/stats system("mkdir -p %s" % statsdir) readPickle = True writePickle = False writeSummary = True self.setFollowOnTarget( Summary(summarydir, self.options.samSize, self.options.numSamples, statsdir, writePickle, writeSummary, readPickle) )
def run(self): #-------------------------------------------------------------------------------------- #Get parameter sets. For each set, issue job to run cactus on different simulation data #-------------------------------------------------------------------------------------- setLogLevel("DEBUG") system("rm -rf %s*" % self.options.outputDir) logger.info("Remove output directory if exists\n") #Convert true.mfa of each simulation to maf format #simTrueMafDir = os.path.join(self.options.outputDir, "sim") simTrueMafDir = self.options.simTrueMafDir check_dir(simTrueMafDir) for sim in self.options.sim: #convert mfa file of current simulation into MAF format: sim = modify_dirname(sim) simName = getRootDir(sim) trueMAF = os.path.join(simTrueMafDir, "%s_true.maf" %(simName)) if not os.path.exists(trueMAF): trueMFA = os.path.join(sim, "true.mfa") runEvalMFAToMAF(trueMFA, trueMAF) logger.info("Converted true.mfa of simulation %s to %s\n" % (sim, trueMAF)) else: logger.info("TrueMAF already exists: %s\n" %(trueMAF)) for parameterFile, parameterName in getParameters(self.options.config): outDir = os.path.join(self.options.outputDir, parameterName) #system("rm -rf %s" % outDir) os.mkdir(outDir) system("mv %s %s/" % (parameterFile, outDir)) logger.info("Created output directory %s for parameter set %s and moved config file to that directory\n" % (outDir, parameterName)) paraFile = os.path.join(outDir, 'param.xml') statsDir = os.path.join(outDir, "stats") os.mkdir(statsDir) logger.info("Created directory for stats files: %s\n" % (statsDir)) self.addChildTarget(CactusTuningSimulationsWrapper(self.options, paraFile, outDir)) logger.info("Added CactusTuningSimulationsWrapper as child for parameter %s\n" %(parameterName)) #Summarize results #self.setFollowOnTarget(CactusTuningSummary(self.options)) logger.info("Added CactusTuningSummary\n")
def run(self): setLogLevel("DEBUG") logger.info("Adding experiments to jobTree\n") experiments = getList(self.options.experimentList) for exp in experiments: self.addChildTarget(RunExperiment(exp, self.options))
header.start += len( subSequence ) + lenNs sequence = sequence[m.start() + lenNs: ] m = re.search( pattern, sequence ) i = fn2(header, searchedSeq + sequence) if i != None: yield i #=========== MAIN ==================== fH = open(sys.argv[1], 'r') fH2 = open(sys.argv[2], 'w') lengthOfNs = int(sys.argv[3]) lengthOfFragment = int(sys.argv[4]) if len(sys.argv) == 6: setLogLevel(sys.argv[5]) headers = set() for name, sequence in fastaRead(fH): header = Header( name.split()[0], len(sequence) ) logger.info("Got a sequence of length %i with header %s for processing" % (len(sequence), name.split()[0])) for newheader, subsequence in fn( header, sequence, lengthOfNs ): if len( subsequence ) > 0: logger.info("Writing out a sequence of length %i with header %s" % (len(subsequence), newheader)) assert newheader not in headers headers.add(newheader) fastaWrite(fH2, newheader, subsequence) fH.close() fH2.close()
def main(): sys.path.append(sys.argv[1]) sys.argv.remove(sys.argv[1]) #Now we can import all the stuff.. from sonLib.bioio import getBasicOptionParser from sonLib.bioio import parseBasicOptions from sonLib.bioio import logger from sonLib.bioio import addLoggingFileHandler, redirectLoggerStreamHandlers from sonLib.bioio import setLogLevel from sonLib.bioio import getTotalCpuTime, getTotalCpuTimeAndMemoryUsage from sonLib.bioio import getTempDirectory from sonLib.bioio import makeSubDir from jobTree.src.job import Job from jobTree.src.master import getEnvironmentFileName, getConfigFileName, listChildDirs, getTempStatsFile, setupJobAfterFailure from sonLib.bioio import system ########################################## #Input args ########################################## jobTreePath = sys.argv[1] jobFile = sys.argv[2] ########################################## #Load the environment for the job ########################################## #First load the environment for the job. fileHandle = open(getEnvironmentFileName(jobTreePath), 'r') environment = cPickle.load(fileHandle) fileHandle.close() for i in environment: if i not in ("TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE"): os.environ[i] = environment[i] # sys.path is used by __import__ to find modules if "PYTHONPATH" in environment: for e in environment["PYTHONPATH"].split(':'): if e != '': sys.path.append(e) #os.environ = environment #os.putenv(key, value) ########################################## #Setup the temporary directories. ########################################## #Dir to put all the temp files in. localSlaveTempDir = getTempDirectory() localTempDir = makeSubDir(os.path.join(localSlaveTempDir, "localTempDir")) ########################################## #Setup the logging ########################################## #Setup the logging. This is mildly tricky because we don't just want to #redirect stdout and stderr for this Python process; we want to redirect it #for this process and all children. Consequently, we can't just replace #sys.stdout and sys.stderr; we need to mess with the underlying OS-level #file descriptors. See <http://stackoverflow.com/a/11632982/402891> #When we start, standard input is file descriptor 0, standard output is #file descriptor 1, and standard error is file descriptor 2. #What file do we want to point FDs 1 and 2 to? tempSlaveLogFile = os.path.join(localSlaveTempDir, "slave_log.txt") #Save the original stdout and stderr (by opening new file descriptors to the #same files) origStdOut = os.dup(1) origStdErr = os.dup(2) #Open the file to send stdout/stderr to. logDescriptor = os.open(tempSlaveLogFile, os.O_WRONLY | os.O_CREAT | os.O_APPEND) #Replace standard output with a descriptor for the log file os.dup2(logDescriptor, 1) #Replace standard error with a descriptor for the log file os.dup2(logDescriptor, 2) #Since we only opened the file once, all the descriptors duped from the #original will share offset information, and won't clobber each others' #writes. See <http://stackoverflow.com/a/5284108/402891>. This shouldn't #matter, since O_APPEND seeks to the end of the file before every write, but #maybe there's something odd going on... #Close the descriptor we used to open the file os.close(logDescriptor) for handler in list(logger.handlers): #Remove old handlers logger.removeHandler(handler) #Add the new handler. The sys.stderr stream has been redirected by swapping #the file descriptor out from under it. logger.addHandler(logging.StreamHandler(sys.stderr)) #Put a message at the top of the log, just to make sure it's working. print "---JOBTREE SLAVE OUTPUT LOG---" sys.stdout.flush() #Log the number of open file descriptors so we can tell if we're leaking #them. logger.debug("Next available file descriptor: {}".format( nextOpenDescriptor())) ########################################## #Parse input files ########################################## config = ET.parse(getConfigFileName(jobTreePath)).getroot() setLogLevel(config.attrib["log_level"]) job = Job.read(jobFile) job.messages = [] #This is the only way to stop messages logging twice, as are read only in the master job.children = [] #Similarly, this is where old children are flushed out. job.write() #Update status, to avoid reissuing children after running a follow on below. if os.path.exists(job.getLogFileName()): #This cleans the old log file os.remove(job.getLogFileName()) logger.info("Parsed arguments and set up logging") #Try loop for slave logging ########################################## #Setup the stats, if requested ########################################## if config.attrib.has_key("stats"): startTime = time.time() startClock = getTotalCpuTime() stats = ET.Element("slave") else: stats = None ########################################## #The max time ########################################## maxTime = float(config.attrib["job_time"]) assert maxTime > 0.0 assert maxTime < sys.maxint ########################################## #Slave log file trapped from here on in ########################################## slaveFailed = False try: ########################################## #The next job ########################################## def globalTempDirName(job, depth): return job.getGlobalTempDirName() + str(depth) command, memoryAvailable, cpuAvailable, depth = job.followOnCommands[-1] defaultMemory = int(config.attrib["default_memory"]) defaultCpu = int(config.attrib["default_cpu"]) assert len(job.children) == 0 startTime = time.time() while True: job.followOnCommands.pop() ########################################## #Global temp dir ########################################## globalTempDir = makeSubDir(globalTempDirName(job, depth)) i = 1 while os.path.isdir(globalTempDirName(job, depth+i)): system("rm -rf %s" % globalTempDirName(job, depth+i)) i += 1 ########################################## #Old children, not yet deleted # #These may exist because of the lazy cleanup #we do ########################################## for childDir in listChildDirs(job.jobDir): logger.debug("Cleaning up old child %s" % childDir) system("rm -rf %s" % childDir) ########################################## #Run the job ########################################## if command != "": #Not a stub if command[:11] == "scriptTree ": ########################################## #Run the target ########################################## loadStack(command).execute(job=job, stats=stats, localTempDir=localTempDir, globalTempDir=globalTempDir, memoryAvailable=memoryAvailable, cpuAvailable=cpuAvailable, defaultMemory=defaultMemory, defaultCpu=defaultCpu, depth=depth) else: #Is another command system(command) ########################################## #Cleanup/reset a successful job/checkpoint ########################################## job.remainingRetryCount = int(config.attrib["try_count"]) system("rm -rf %s/*" % (localTempDir)) job.update(depth=depth, tryCount=job.remainingRetryCount) ########################################## #Establish if we can run another job ########################################## if time.time() - startTime > maxTime: logger.info("We are breaking because the maximum time the job should run for has been exceeded") break #Deal with children if len(job.children) >= 1: #We are going to have to return to the parent logger.info("No more jobs can run in series by this slave, its got %i children" % len(job.children)) break if len(job.followOnCommands) == 0: logger.info("No more jobs can run by this slave as we have exhausted the follow ons") break #Get the next job and see if we have enough cpu and memory to run it.. command, memory, cpu, depth = job.followOnCommands[-1] if memory > memoryAvailable: logger.info("We need more memory for the next job, so finishing") break if cpu > cpuAvailable: logger.info("We need more cpus for the next job, so finishing") break logger.info("Starting the next job") ########################################## #Finish up the stats ########################################## if stats != None: totalCpuTime, totalMemoryUsage = getTotalCpuTimeAndMemoryUsage() stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(totalCpuTime - startClock) stats.attrib["memory"] = str(totalMemoryUsage) tempStatsFile = getTempStatsFile(jobTreePath) fileHandle = open(tempStatsFile + ".new", "w") ET.ElementTree(stats).write(fileHandle) fileHandle.close() os.rename(tempStatsFile + ".new", tempStatsFile) #This operation is atomic logger.info("Finished running the chain of jobs on this node, we ran for a total of %f seconds" % (time.time() - startTime)) ########################################## #Where slave goes wrong ########################################## except: #Case that something goes wrong in slave traceback.print_exc() logger.critical("Exiting the slave because of a failed job on host %s", socket.gethostname()) job = Job.read(jobFile) setupJobAfterFailure(job, config) job.write() slaveFailed = True ########################################## #Cleanup ########################################## #Close the slave logging #Flush at the Python level sys.stdout.flush() sys.stderr.flush() #Flush at the OS level os.fsync(1) os.fsync(2) #Close redirected stdout and replace with the original standard output. os.dup2(origStdOut, 1) #Close redirected stderr and replace with the original standard error. os.dup2(origStdOut, 2) #sys.stdout and sys.stderr don't need to be modified at all. We don't need #to call redirectLoggerStreamHandlers since they still log to sys.stderr #Close our extra handles to the original standard output and standard error #streams, so we don't leak file handles. os.close(origStdOut) os.close(origStdErr) #Now our file handles are in exactly the state they were in before. #Copy back the log file to the global dir, if needed if slaveFailed: truncateFile(tempSlaveLogFile) system("mv %s %s" % (tempSlaveLogFile, job.getLogFileName())) #Remove the temp dir system("rm -rf %s" % localSlaveTempDir) #This must happen after the log file is done with, else there is no place to put the log if (not slaveFailed) and len(job.followOnCommands) == 0 and len(job.children) == 0 and len(job.messages) == 0: ########################################## #Cleanup global files at the end of the chain ########################################## job.delete()
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option("--treePointer", dest="treePointer", help="File containing pointer to the tree data", default="None") options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs print "Checking that we can report to std out" job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointer).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") i = 0 children = tree.find("children").findall("child") if len(children) > 0: for child in children: #Parse the child XML tree childTree = ET.parse(child.attrib["file"]).getroot() i += int(childTree.attrib["count"]) else: i = 1 tree.attrib["count"] = str(i) logger.info("Calculated the leaf count: %i" % i) fileHandle = open(treePointer.attrib["file"], 'w') ET.ElementTree(tree).write(fileHandle) fileHandle.close() logger.info("Updated the tree file: %s" % treePointer.attrib["file"]) for childPointer in treePointer.find("children").findall("child"): if os.path.isfile(childPointer.attrib["file"]): os.remove(childPointer.attrib["file"]) logger.info("Removed the child pointer files") logger.info("No need to update the job file, as we didn't make anything new!") if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def main(): sys.path.append(sys.argv[1]) sys.argv.remove(sys.argv[1]) #Now we can import all the stuff.. from sonLib.bioio import getBasicOptionParser from sonLib.bioio import parseBasicOptions from sonLib.bioio import logger from sonLib.bioio import addLoggingFileHandler from sonLib.bioio import setLogLevel from sonLib.bioio import getTotalCpuTime from sonLib.bioio import getTempDirectory from jobTree.src.master import writeJobs from jobTree.src.master import readJob from sonLib.bioio import system ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") options, args = parseBasicOptions(parser) assert len(args) == 0 ########################################## #Parse the job. ########################################## job = readJob(options.jobFile) ########################################## #Setup the logging ########################################## #Setup the logging setLogLevel(job.attrib["log_level"]) addLoggingFileHandler(job.attrib["slave_log_file"], rotatingLogging=False) logger.info("Parsed arguments and set up logging") ########################################## #Setup the stats, if requested ########################################## if job.attrib.has_key("stats"): startTime = time.time() startClock = getTotalCpuTime() stats = ET.Element("slave") else: stats = None ########################################## #Load the environment for the job ########################################## #First load the environment for the job. fileHandle = open(job.attrib["environment_file"], 'r') environment = cPickle.load(fileHandle) fileHandle.close() for i in environment: if i not in ("TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE"): os.environ[i] = environment[i] # sys.path is used by __import__ to find modules if "PYTHONPATH" in environment: for e in environment["PYTHONPATH"].split(':'): if e != '': sys.path.append(e) #os.environ = environment #os.putenv(key, value) logger.info("Loaded the environment for the process") ########################################## #Setup the temporary directories. ########################################## #Dir to put all the temp files in. localSlaveTempDir = getTempDirectory() localTempDir = os.path.join(localSlaveTempDir, "localTempDir") os.mkdir(localTempDir) os.chmod(localTempDir, 0777) ########################################## #Run the script. ########################################## maxTime = float(job.attrib["job_time"]) assert maxTime > 0.0 assert maxTime < sys.maxint jobToRun = job.find("followOns").findall("followOn")[-1] memoryAvailable = int(jobToRun.attrib["memory"]) cpuAvailable = int(jobToRun.attrib["cpu"]) startTime = time.time() while True: tempLogFile = processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats, environment, localSlaveTempDir, localTempDir) if job.attrib["colour"] != "black": logger.critical("Exiting the slave because of a failed job") system("mv %s %s" % (tempLogFile, job.attrib["log_file"])) #Copy back the job log file, because we saw failure break elif job.attrib.has_key("reportAllJobLogFiles"): logger.info("Exiting because we've been asked to report all logs, and this involves returning to the master") #Copy across the log file system("mv %s %s" % (tempLogFile, job.attrib["log_file"])) break totalRuntime = float(job.attrib["total_time"]) #This is the estimate runtime of the jobs on the followon stack childrenNode = job.find("children") childrenList = childrenNode.findall("child") #childRuntime = sum([ float(child.attrib["time"]) for child in childrenList ]) if len(childrenList) >= 2: # or totalRuntime + childRuntime > maxTime: #We are going to have to return to the parent logger.info("No more jobs can run in series by this slave, its got %i children" % len(childrenList)) break if time.time() - startTime > maxTime: logger.info("We are breaking because the maximum time the job should run for has been exceeded") break followOns = job.find("followOns") while len(childrenList) > 0: child = childrenList.pop() childrenNode.remove(child) totalRuntime += float(child.attrib["time"]) ET.SubElement(followOns, "followOn", child.attrib.copy()) #assert totalRuntime <= maxTime + 1 #The plus one second to avoid unimportant rounding errors job.attrib["total_time"] = str(totalRuntime) assert len(childrenNode.findall("child")) == 0 if len(followOns.findall("followOn")) == 0: logger.info("No more jobs can run by this slave as we have exhausted the follow ons") break #Get the next job and see if we have enough cpu and memory to run it.. jobToRun = job.find("followOns").findall("followOn")[-1] if int(jobToRun.attrib["memory"]) > memoryAvailable: logger.info("We need more memory for the next job, so finishing") break if int(jobToRun.attrib["cpu"]) > cpuAvailable: logger.info("We need more cpus for the next job, so finishing") break ##Updated the job so we can start the next loop cycle job.attrib["colour"] = "grey" writeJobs([ job ]) logger.info("Updated the status of the job to grey and starting the next job") #Write back the job file with the updated jobs, using the checkpoint method. writeJobs([ job ]) logger.info("Written out an updated job file") logger.info("Finished running the chain of jobs on this node, we ran for a total of %f seconds" % (time.time() - startTime)) ########################################## #Cleanup the temporary directory ########################################## system("rm -rf %s" % localSlaveTempDir) ########################################## #Finish up the stats ########################################## if stats != None: stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(getTotalCpuTime() - startClock) fileHandle = open(job.attrib["stats"], 'w') ET.ElementTree(stats).write(fileHandle) fileHandle.close()
def execute(self, job): setLogLevel(job.attrib["log_level"]) logger.info("Setup logging with level: %s" % job.attrib["log_level"]) self.tempDirAccessed = False self.localTempDir = job.attrib["local_temp_dir"] self.globalTempDir = job.attrib["global_temp_dir"] maxTime = float(job.attrib["job_time"]) memory = int(job.attrib["available_memory"]) cpu = int(job.attrib["available_cpu"]) if job.attrib.has_key("stats"): stats = ET.SubElement(job, "stack") startTime = time.time() startClock = getTotalCpuTime() else: stats = None newChildren = [] #List to add all the children to before we package them #off into stacks newChildCommands = [] #Ditto for the child commands newFollowOns = [] #Ditto for the follow-ons baseDir = os.getcwd() while self.hasRemaining(): if stats is not None: #Getting the runtime of the stats module targetStartTime = time.time() targetStartClock = getTotalCpuTime() target = self.popTarget() target.setStack(self) #Debug check that we have the right amount of CPU and memory for the job in hand targetMemory = target.getMemory() if targetMemory != sys.maxint: assert targetMemory <= memory targetCpu = target.getCpu() if targetCpu != sys.maxint: assert targetCpu <= cpu #Run the target, first cleanup then run. target.run() #Change dir back to cwd dir, if changed by target (this is a safety issue) if os.getcwd() != baseDir: os.chdir(baseDir) #Cleanup after the target if self.tempDirAccessed: system("rm -rf %s/*" % self.localTempDir) self.tempDirAccessed = False #Handle the follow on followOn = target.getFollowOn() #if target.__class__ != CleanupGlobalTempDirTarget and followOn == None: # followOn = CleanupGlobalTempDirTarget() if followOn is not None: #Target to get rid of follow on when done. if target.isGlobalTempDirSet(): followOn.setGlobalTempDir(target.getGlobalTempDir()) newFollowOns.append(followOn) #Now add the children to the newChildren stack newChildren += target.getChildren() #Now add the child commands to the newChildCommands stack newChildCommands += target.getChildCommands() if stats is not None: ET.SubElement(stats, "target", { "time":str(time.time() - targetStartTime), "clock":str(getTotalCpuTime() - targetStartClock), "class":".".join((target.__class__.__name__,)), "e_time":str(target.getRunTime())}) for message in target.getMasterLoggingMessages(): if job.find("messages") is None: ET.SubElement(job, "messages") ET.SubElement(job.find("messages"), "message", { "message": message} ) ####### #Now build the new stacks and corresponding jobs ####### #First add all the follow ons to the existing stack and make it a follow on job for job-tree assert not self.hasRemaining() #First sort out the follow on job if len(newFollowOns) > 0: #If we have follow ons followOnRuntime = sum([ followOn.getRunTime() for followOn in newFollowOns ]) if followOnRuntime > maxTime: #We create a parallel list of follow ons followOnStack = Stack(ParallelFollowOnTarget(newFollowOns)) else: followOnStack = Stack(newFollowOns.pop()) while len(newFollowOns) > 0: followOnStack.addTarget(newFollowOns.pop()) job.attrib["command"] = followOnStack.makeRunnable(self.globalTempDir) job.attrib["time"] = str(followOnStack.getRunTime()) followOnMemory = followOnStack.getMemory() assert not job.attrib.has_key("memory") if followOnMemory != sys.maxint: job.attrib["memory"] = str(followOnMemory) assert not job.attrib.has_key("cpu") followOnCpu = followOnStack.getCpu() if followOnCpu != sys.maxint: job.attrib["cpu"] = str(followOnCpu) #Now build stacks of children.. childrenTag = job.find("children") while len(newChildren) > 0: childStack = Stack(newChildren.pop()) while len(newChildren) > 0 and childStack.getRunTime() <= maxTime: childStack.addTarget(newChildren.pop()) childJob = ET.SubElement(childrenTag, "child", { "command":childStack.makeRunnable(self.globalTempDir), "time":str(childStack.getRunTime()) }) childMemory = childStack.getMemory() assert not childJob.attrib.has_key("memory") if childMemory != sys.maxint: childJob.attrib["memory"] = str(childMemory) assert not childJob.attrib.has_key("cpu") childCpu = childStack.getCpu() if childCpu != sys.maxint: childJob.attrib["cpu"] = str(childCpu) #Now build jobs for each child command for childCommand, runTime in newChildCommands: ET.SubElement(childrenTag, "child", { "command":str(childCommand), "time":str(runTime) }) #Finish up the stats if stats is not None: stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(getTotalCpuTime() - startClock)
def main(): sys.path.append(sys.argv[1]) sys.argv.remove(sys.argv[1]) #Now we can import all the stuff.. from sonLib.bioio import getBasicOptionParser from sonLib.bioio import parseBasicOptions from sonLib.bioio import logger from sonLib.bioio import addLoggingFileHandler, redirectLoggerStreamHandlers from sonLib.bioio import setLogLevel from sonLib.bioio import getTotalCpuTime, getTotalCpuTimeAndMemoryUsage from sonLib.bioio import getTempDirectory from sonLib.bioio import makeSubDir from jobTree.src.job import Job from jobTree.src.master import getEnvironmentFileName, getConfigFileName, listChildDirs, getTempStatsFile, setupJobAfterFailure from sonLib.bioio import system ########################################## #Input args ########################################## jobTreePath = sys.argv[1] jobFile = sys.argv[2] ########################################## #Load the environment for the job ########################################## #First load the environment for the job. fileHandle = open(getEnvironmentFileName(jobTreePath), 'r') environment = cPickle.load(fileHandle) fileHandle.close() for i in environment: if i not in ("TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE"): os.environ[i] = environment[i] # sys.path is used by __import__ to find modules if "PYTHONPATH" in environment: for e in environment["PYTHONPATH"].split(':'): if e != '': sys.path.append(e) #os.environ = environment #os.putenv(key, value) ########################################## #Setup the temporary directories. ########################################## #Dir to put all the temp files in. localSlaveTempDir = getTempDirectory() localTempDir = makeSubDir(os.path.join(localSlaveTempDir, "localTempDir")) ########################################## #Setup the logging ########################################## #Setup the logging tempSlaveLogFile = os.path.join(localSlaveTempDir, "slave_log.txt") slaveHandle = open(tempSlaveLogFile, 'w') for handler in list(logger.handlers): #Remove old handlers logger.removeHandler(handler) logger.addHandler(logging.StreamHandler(slaveHandle)) origStdErr = sys.stderr origStdOut = sys.stdout sys.stderr = slaveHandle sys.stdout = slaveHandle ########################################## #Parse input files ########################################## config = ET.parse(getConfigFileName(jobTreePath)).getroot() setLogLevel(config.attrib["log_level"]) job = Job.read(jobFile) job.messages = [] #This is the only way to stop messages logging twice, as are read only in the master job.children = [] if os.path.exists(job.getLogFileName()): #This cleans the old log file os.remove(job.getLogFileName()) logger.info("Parsed arguments and set up logging") #Try loop for slave logging ########################################## #Setup the stats, if requested ########################################## if config.attrib.has_key("stats"): startTime = time.time() startClock = getTotalCpuTime() stats = ET.Element("slave") else: stats = None ########################################## #The max time ########################################## maxTime = float(config.attrib["job_time"]) assert maxTime > 0.0 assert maxTime < sys.maxint ########################################## #Slave log file trapped from here on in ########################################## slaveFailed = False try: ########################################## #The next job ########################################## def globalTempDirName(job, depth): return job.getGlobalTempDirName() + str(depth) command, memoryAvailable, cpuAvailable, depth = job.followOnCommands[-1] defaultMemory = int(config.attrib["default_memory"]) defaultCpu = int(config.attrib["default_cpu"]) assert len(job.children) == 0 startTime = time.time() while True: job.followOnCommands.pop() ########################################## #Global temp dir ########################################## globalTempDir = makeSubDir(globalTempDirName(job, depth)) i = 1 while os.path.isdir(globalTempDirName(job, depth+i)): system("rm -rf %s" % globalTempDirName(job, depth+i)) i += 1 ########################################## #Old children, not yet deleted # #These may exist because of the lazy cleanup #we do ########################################## for childDir in listChildDirs(job.jobDir): logger.debug("Cleaning up old child %s" % childDir) system("rm -rf %s" % childDir) ########################################## #Run the job ########################################## if command != "": #Not a stub if command[:11] == "scriptTree ": ########################################## #Run the target ########################################## loadStack(command).execute(job=job, stats=stats, localTempDir=localTempDir, globalTempDir=globalTempDir, memoryAvailable=memoryAvailable, cpuAvailable=cpuAvailable, defaultMemory=defaultMemory, defaultCpu=defaultCpu, depth=depth) else: #Is another command system(command) ########################################## #Cleanup/reset a successful job/checkpoint ########################################## job.remainingRetryCount = int(config.attrib["try_count"]) system("rm -rf %s/*" % (localTempDir)) job.update(depth=depth, tryCount=job.remainingRetryCount) ########################################## #Establish if we can run another job ########################################## if time.time() - startTime > maxTime: logger.info("We are breaking because the maximum time the job should run for has been exceeded") break #Deal with children if len(job.children) >= 1: #We are going to have to return to the parent logger.info("No more jobs can run in series by this slave, its got %i children" % len(job.children)) break if len(job.followOnCommands) == 0: logger.info("No more jobs can run by this slave as we have exhausted the follow ons") break #Get the next job and see if we have enough cpu and memory to run it.. command, memory, cpu, depth = job.followOnCommands[-1] if memory > memoryAvailable: logger.info("We need more memory for the next job, so finishing") break if cpu > cpuAvailable: logger.info("We need more cpus for the next job, so finishing") break logger.info("Starting the next job") ########################################## #Finish up the stats ########################################## if stats != None: totalCpuTime, totalMemoryUsage = getTotalCpuTimeAndMemoryUsage() stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(totalCpuTime - startClock) stats.attrib["memory"] = str(totalMemoryUsage) tempStatsFile = getTempStatsFile(jobTreePath) fileHandle = open(tempStatsFile + ".new", "w") ET.ElementTree(stats).write(fileHandle) fileHandle.close() os.rename(tempStatsFile + ".new", tempStatsFile) #This operation is atomic logger.info("Finished running the chain of jobs on this node, we ran for a total of %f seconds" % (time.time() - startTime)) ########################################## #Where slave goes wrong ########################################## except: #Case that something goes wrong in slave traceback.print_exc(file = slaveHandle) logger.critical("Exiting the slave because of a failed job on host %s", socket.gethostname()) job = Job.read(jobFile) setupJobAfterFailure(job, config) job.write() slaveFailed = True ########################################## #Cleanup ########################################## #Close the slave logging slaveHandle.flush() sys.stderr = origStdErr sys.stdout = origStdOut redirectLoggerStreamHandlers(slaveHandle, sys.stderr) slaveHandle.close() #Copy back the log file to the global dir, if needed if slaveFailed: truncateFile(tempSlaveLogFile) system("mv %s %s" % (tempSlaveLogFile, job.getLogFileName())) #Remove the temp dir system("rm -rf %s" % localSlaveTempDir) #This must happen after the log file is done with, else there is no place to put the log if (not slaveFailed) and len(job.followOnCommands) == 0 and len(job.children) == 0 and len(job.messages) == 0: ########################################## #Cleanup global files at the end of the chain ########################################## job.delete()
def run(self): setLogLevel("INFO") file2info = readSeqIndex(self.options.seqIndexFile, self.options.samples) #logger.info("Done reading sequence.index file\n") for sample in file2info: self.addChildTarget( RunSample(self.options, file2info[sample]) )