Beispiel #1
0
 def __init__(self, config, batchSystem):
     self.jobTree = config.attrib["job_tree"]
     self.jobIDsToJobsHash = {}
     self.batchSystem = batchSystem
     self.jobsIssued = 0
     self.jobTreeSlavePath = os.path.join(workflowRootPath(), "src", "jobTreeSlave.py")
     self.rootPath = os.path.split(workflowRootPath())[0]
Beispiel #2
0
 def __init__(self, config, batchSystem):
     self.jobTree = config.attrib["job_tree"]
     self.jobIDsToJobsHash = {}
     self.batchSystem = batchSystem
     self.jobsIssued = 0
     self.jobTreeSlavePath = os.path.join(workflowRootPath(), "src",
                                          "jobTreeSlave.py")
     self.rootPath = os.path.split(workflowRootPath())[0]
Beispiel #3
0
    def makeRunnable(self, tempDir):
        from sonLib.bioio import getTempFile
        from jobTree.src.bioio import workflowRootPath

        pickleFile = tempDir.getTempFile(".pickle")
        fileHandle = open(pickleFile, 'w')
        cPickle.dump(self, fileHandle, cPickle.HIGHEST_PROTOCOL)
        fileHandle.close() 
        multijobexec = os.path.join(workflowRootPath(), "bin", "multijob")
        jtPath = os.path.split(workflowRootPath())[0]
        return "%s %s %s" % (multijobexec, pickleFile, jtPath)
Beispiel #4
0
    def makeRunnable(self, tempDir):
        from sonLib.bioio import getTempFile
        from jobTree.src.bioio import workflowRootPath

        pickleFile = tempDir.getTempFile(".pickle")
        fileHandle = open(pickleFile, 'w')
        cPickle.dump(self, fileHandle, cPickle.HIGHEST_PROTOCOL)
        fileHandle.close()
        multijobexec = os.path.join(workflowRootPath(), "bin", "multijob")
        jtPath = os.path.split(workflowRootPath())[0]
        return "%s %s %s" % (multijobexec, pickleFile, jtPath)
def parasolStop():
    """Function stops the parasol hub and node.
    """
    machineList = os.path.join(workflowRootPath(), "jobTree", "machineList")
    i = os.system("paraNodeStop %s" % machineList)
    j = os.system("paraHubStop now")
    return i, j
def parasolRestart():
    """Function starts the parasol hub and node.
    """
    parasolStop()
    while True:
        machineList = os.path.join(workflowRootPath(), "jobTree", "machineList")
        #pathEnvVar = os.environ["PATH"]
        os.system("paraNode start -hub=localhost") 
        #-umask=002 -userPath=%s -sysPath=%s" % (pathEnvVar, pathEnvVar))
        os.system("paraHub %s subnet=127.0.0 &" % (machineList,))
        tempFile = getTempFile()
        dead = True
        try:
            popen("parasol status", tempFile)
            fileHandle = open(tempFile, 'r')
            line = fileHandle.readline()
            while line != '':
                if "Nodes dead" in line:
                    print line
                    if int(line.split()[-1]) == 0:
                        dead = False
                line = fileHandle.readline()
            fileHandle.close()
        except RuntimeError:
            pass
        os.remove(tempFile)
        if not dead:
            break
        else:
            logger.info("Tried to restart the parasol process, but failed, will try again")
            parasolStop()
            time.sleep(5)
    logger.info("Restarted the parasol process")
Beispiel #7
0
def issueJobs(jobs, jobIDsToJobsHash, batchSystem, queueingJobs, maxJobs, cpusUsed):
    """Issues jobs to the batch system.
    """
    for job in jobs:
        queueingJobs.append(job)
    jobCommands = {}
    #for i in xrange(min(maxJobs - len(jobIDsToJobsHash.keys()), len(queueingJobs))):
    while len(queueingJobs) > 0:
        job = queueingJobs[-1]
        jobCommand = os.path.join(workflowRootPath(), "bin", "jobTreeSlave")
        followOnJob = job.find("followOns").findall("followOn")[-1]
        memory = int(followOnJob.attrib["memory"])
        cpu = int(followOnJob.attrib["cpu"])
        if cpu > maxJobs:
            raise RuntimeError("A request was made for %i cpus by the maxJobs parameters is set to %i, try increasing max jobs or lowering cpu demands" % (cpu, maxJobs))
        if cpu + cpusUsed > maxJobs:
            break
        cpusUsed += cpu
        jobCommands["%s -E %s %s --job %s" % (sys.executable, jobCommand, os.path.split(workflowRootPath())[0], job.attrib["file"])] = (job.attrib["file"], memory, cpu, job.attrib["slave_log_file"])
        queueingJobs.pop()
    issuedJobs = batchSystem.issueJobs([ (key, jobCommands[key][1], jobCommands[key][2], jobCommands[key][3]) for key in jobCommands.keys() ])
    assert len(issuedJobs.keys()) == len(jobCommands.keys())
    for jobID in issuedJobs.keys():
        command = issuedJobs[jobID]
        jobFile = jobCommands[command][0]
        cpu = jobCommands[command][2]
        assert jobID not in jobIDsToJobsHash
        jobIDsToJobsHash[jobID] = (jobFile, cpu)
        logger.debug("Issued the job: %s with job id: %i and cpus: %i" % (jobFile, jobID, cpu))
    return cpusUsed
Beispiel #8
0
def issueJobs(jobs, jobIDsToJobsHash, batchSystem):
    """Issues jobs to the batch system.
    """
    jobCommands = {}
    for job in jobs:
        jobCommand = os.path.join(workflowRootPath(), "bin", "jobTreeSlave")
        followOnJob = job.find("followOns").findall("followOn")[-1]
        jobCommands["%s -E %s %s --job %s" % (sys.executable, jobCommand, os.path.split(workflowRootPath())[0], job.attrib["file"])] = (job.attrib["file"], int(followOnJob.attrib["memory"]), int(followOnJob.attrib["cpu"]), job.attrib["slave_log_file"])
    issuedJobs = batchSystem.issueJobs([ (key, jobCommands[key][1], jobCommands[key][2], jobCommands[key][3]) for key in jobCommands.keys() ])
    assert len(issuedJobs.keys()) == len(jobCommands.keys())
    for jobID in issuedJobs.keys():
        command = issuedJobs[jobID]
        jobFile = jobCommands[command][0]
        assert jobID not in jobIDsToJobsHash
        jobIDsToJobsHash[jobID] = jobFile
        logger.debug("Issued the job: %s with job id: %i " % (jobFile, jobID))