def __init__(self, config, batchSystem): self.jobTree = config.attrib["job_tree"] self.jobIDsToJobsHash = {} self.batchSystem = batchSystem self.jobsIssued = 0 self.jobTreeSlavePath = os.path.join(workflowRootPath(), "src", "jobTreeSlave.py") self.rootPath = os.path.split(workflowRootPath())[0]
def makeRunnable(self, tempDir): from sonLib.bioio import getTempFile from jobTree.src.bioio import workflowRootPath pickleFile = tempDir.getTempFile(".pickle") fileHandle = open(pickleFile, 'w') cPickle.dump(self, fileHandle, cPickle.HIGHEST_PROTOCOL) fileHandle.close() multijobexec = os.path.join(workflowRootPath(), "bin", "multijob") jtPath = os.path.split(workflowRootPath())[0] return "%s %s %s" % (multijobexec, pickleFile, jtPath)
def parasolStop(): """Function stops the parasol hub and node. """ machineList = os.path.join(workflowRootPath(), "jobTree", "machineList") i = os.system("paraNodeStop %s" % machineList) j = os.system("paraHubStop now") return i, j
def parasolRestart(): """Function starts the parasol hub and node. """ parasolStop() while True: machineList = os.path.join(workflowRootPath(), "jobTree", "machineList") #pathEnvVar = os.environ["PATH"] os.system("paraNode start -hub=localhost") #-umask=002 -userPath=%s -sysPath=%s" % (pathEnvVar, pathEnvVar)) os.system("paraHub %s subnet=127.0.0 &" % (machineList,)) tempFile = getTempFile() dead = True try: popen("parasol status", tempFile) fileHandle = open(tempFile, 'r') line = fileHandle.readline() while line != '': if "Nodes dead" in line: print line if int(line.split()[-1]) == 0: dead = False line = fileHandle.readline() fileHandle.close() except RuntimeError: pass os.remove(tempFile) if not dead: break else: logger.info("Tried to restart the parasol process, but failed, will try again") parasolStop() time.sleep(5) logger.info("Restarted the parasol process")
def issueJobs(jobs, jobIDsToJobsHash, batchSystem, queueingJobs, maxJobs, cpusUsed): """Issues jobs to the batch system. """ for job in jobs: queueingJobs.append(job) jobCommands = {} #for i in xrange(min(maxJobs - len(jobIDsToJobsHash.keys()), len(queueingJobs))): while len(queueingJobs) > 0: job = queueingJobs[-1] jobCommand = os.path.join(workflowRootPath(), "bin", "jobTreeSlave") followOnJob = job.find("followOns").findall("followOn")[-1] memory = int(followOnJob.attrib["memory"]) cpu = int(followOnJob.attrib["cpu"]) if cpu > maxJobs: raise RuntimeError("A request was made for %i cpus by the maxJobs parameters is set to %i, try increasing max jobs or lowering cpu demands" % (cpu, maxJobs)) if cpu + cpusUsed > maxJobs: break cpusUsed += cpu jobCommands["%s -E %s %s --job %s" % (sys.executable, jobCommand, os.path.split(workflowRootPath())[0], job.attrib["file"])] = (job.attrib["file"], memory, cpu, job.attrib["slave_log_file"]) queueingJobs.pop() issuedJobs = batchSystem.issueJobs([ (key, jobCommands[key][1], jobCommands[key][2], jobCommands[key][3]) for key in jobCommands.keys() ]) assert len(issuedJobs.keys()) == len(jobCommands.keys()) for jobID in issuedJobs.keys(): command = issuedJobs[jobID] jobFile = jobCommands[command][0] cpu = jobCommands[command][2] assert jobID not in jobIDsToJobsHash jobIDsToJobsHash[jobID] = (jobFile, cpu) logger.debug("Issued the job: %s with job id: %i and cpus: %i" % (jobFile, jobID, cpu)) return cpusUsed
def issueJobs(jobs, jobIDsToJobsHash, batchSystem): """Issues jobs to the batch system. """ jobCommands = {} for job in jobs: jobCommand = os.path.join(workflowRootPath(), "bin", "jobTreeSlave") followOnJob = job.find("followOns").findall("followOn")[-1] jobCommands["%s -E %s %s --job %s" % (sys.executable, jobCommand, os.path.split(workflowRootPath())[0], job.attrib["file"])] = (job.attrib["file"], int(followOnJob.attrib["memory"]), int(followOnJob.attrib["cpu"]), job.attrib["slave_log_file"]) issuedJobs = batchSystem.issueJobs([ (key, jobCommands[key][1], jobCommands[key][2], jobCommands[key][3]) for key in jobCommands.keys() ]) assert len(issuedJobs.keys()) == len(jobCommands.keys()) for jobID in issuedJobs.keys(): command = issuedJobs[jobID] jobFile = jobCommands[command][0] assert jobID not in jobIDsToJobsHash jobIDsToJobsHash[jobID] = jobFile logger.debug("Issued the job: %s with job id: %i " % (jobFile, jobID))