Beispiel #1
0
 def testCopySubRangeOfFile(self):
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile = getTempFile(rootDir=tempDir)
         outputFile = getTempFile(rootDir=tempDir)
         makeFileToSort(tempFile)
         fileSize = os.path.getsize(tempFile)
         assert fileSize > 0
         fileStart = random.choice(xrange(0, fileSize))
         fileEnd = random.choice(xrange(fileStart, fileSize))
         copySubRangeOfFile(tempFile, fileStart, fileEnd, outputFile)
         l = open(outputFile, 'r').read()
         l2 = open(tempFile, 'r').read()[fileStart:fileEnd]
         checkEqual(l, l2)
         system("rm -rf %s" % tempDir)
def killMasterAndParasol():
    """Method to destroy master process
    """
    tempFile = getTempFile()
    popen("ps -a", tempFile)
    fileHandle = open(tempFile, 'r')
    line = fileHandle.readline()
    #Example parasol state lines:
    #67401 ttys002    0:00.06 /Users/benedictpaten/kent/src/parasol/bin/paraNode start -hub=localhost -log=/tmp/node.2009-07-08.log -umask=002 -userPath=bin:bin/x86_64:bin/i
    #67403 ttys002    0:00.65 /Users/benedictpaten/kent/src/parasol/bin/paraHub -log=/tmp/hub.2009-07-08.log machineList subnet=127.0.0
    #68573 ttys002    0:00.00 /Users/benedictpaten/kent/src/parasol/bin/paraNode start -hub=localhost -log=/tmp/node.2009-07-08.log -umask=002 -userPath=bin:bin/x86_64:bin/i
    while line != '':
        tokens = line.split()
        if 'paraNode' in line or 'paraHub' in line:
            if random.random() > 0.5:
                i = os.system("kill %i" % int(tokens[0]))
                logger.info("Tried to kill parasol process: %i, line: %s, exit value: %i" % (int(tokens[0]), line, i))
                break
        elif 'jobTreeMaster.py' in line:
            logger.info("Have job tree master line")
            if random.random() > 0.5:
                i = os.system("kill %i" % int(tokens[0]))
                logger.info("Tried to kill master process: %i, line: %s, exit value: %i" % (int(tokens[0]), line, i))
                break
        line = fileHandle.readline()
    fileHandle.close()
    os.remove(tempFile)
    parasolRestart()
def parasolRestart():
    """Function starts the parasol hub and node.
    """
    parasolStop()
    while True:
        machineList = os.path.join(workflowRootPath(), "jobTree", "machineList")
        #pathEnvVar = os.environ["PATH"]
        os.system("paraNode start -hub=localhost") 
        #-umask=002 -userPath=%s -sysPath=%s" % (pathEnvVar, pathEnvVar))
        os.system("paraHub %s subnet=127.0.0 &" % (machineList,))
        tempFile = getTempFile()
        dead = True
        try:
            popen("parasol status", tempFile)
            fileHandle = open(tempFile, 'r')
            line = fileHandle.readline()
            while line != '':
                if "Nodes dead" in line:
                    print line
                    if int(line.split()[-1]) == 0:
                        dead = False
                line = fileHandle.readline()
            fileHandle.close()
        except RuntimeError:
            pass
        os.remove(tempFile)
        if not dead:
            break
        else:
            logger.info("Tried to restart the parasol process, but failed, will try again")
            parasolStop()
            time.sleep(5)
    logger.info("Restarted the parasol process")
Beispiel #4
0
 def testMerge(self):
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile1 = getTempFile(rootDir=tempDir)
         tempFile2 = getTempFile(rootDir=tempDir)
         tempFile3 = getTempFile(rootDir=tempDir)
         makeFileToSort(tempFile1)
         makeFileToSort(tempFile2)
         sort(tempFile1)
         sort(tempFile2)
         merge(tempFile1, tempFile2, tempFile3)
         lines1 = loadFile(tempFile1) + loadFile(tempFile2)
         lines1.sort()
         lines2 = loadFile(tempFile3)
         checkEqual(lines1, lines2)
         system("rm -rf %s" % tempDir)
def main():
    parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1")
    
    parser.add_option("--job", dest="jobFile", 
                      help="Job file containing command to run",
                      default="None")
    
    parser.add_option("--treePointer", dest="treePointerFile", 
                      help="File containing pointer to the tree data",
                      default="None")
    
    options, args = parseBasicOptions(parser)
    
    logger.info("Parsed the input arguments")
    
    job = ET.parse(options.jobFile).getroot() 
    setLogLevel(job.attrib["log_level"])
    
    logger.info("Parsed the job XML")
    
    treePointer = ET.parse(options.treePointerFile).getroot() 
    
    logger.info("Parsed the tree pointer XML")
    
    tree = ET.parse(treePointer.attrib["file"]).getroot()
    
    logger.info("Parsed the tree XML")
    
    for child in tree.find("children").findall("child"):
        #Make the chuld tree pointer
        childTreePointerFile = makeTreePointer(child.attrib["file"], getTempFile(rootDir=job.attrib["global_temp_dir"]))
        #Make the child command
        unbornChild = ET.SubElement(job.find("children"), "child")
        command = "jobTreeTest_CommandFirst.py --treePointer %s --job JOB_FILE" % \
        (childTreePointerFile,)
        unbornChild.attrib["command"] = command
        if random.random() > 0.2:
            unbornChild.attrib["time"] = str(random.random() * 10)
        #Make the child tree pointer
        ET.SubElement(treePointer.find("children"), "child", { "file":childTreePointerFile })
    
    job.attrib["command"] = "jobTreeTest_CommandSecond.py --treePointer %s --job JOB_FILE" % \
    (options.treePointerFile,)
    logger.info("Made new command")

    fileHandle = open(options.jobFile, 'w')
    ET.ElementTree(job).write(fileHandle)
    fileHandle.close()
    
    logger.info("Updated the job file")
    
    print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs
    print "Checking that we can report to std out"

    if random.random() > 0.9:
        logger.info("Going to fail the job")
        sys.exit(1)
    logger.info("Going to pass the job done okay")
    sys.exit(0)
Beispiel #6
0
 def run(self):
     length = self.fileEnd - self.fileStart
     self.logToMaster("Am running a down target with length: %i from input file: %s" % (length, self.inputFile))
     assert length >= 0
     if length > self.N:
         midPoint = getMidPoint(self.inputFile, self.fileStart, self.fileEnd)
         assert midPoint >= self.fileStart
         assert midPoint+1 < self.fileEnd
         #We will subdivide the file
         tempFile1 = getTempFile(rootDir=self.getGlobalTempDir())
         tempFile2 = getTempFile(rootDir=self.getGlobalTempDir())
         self.addChildTarget(Down(self.inputFile, self.fileStart, midPoint+1, self.N, tempFile1))
         self.addChildTarget(Down(self.inputFile, midPoint+1, self.fileEnd, self.N, tempFile2)) #Add one to avoid the newline
         self.setFollowOnTarget(Up(tempFile1, tempFile2, self.outputFile))                
     else:
         #We can sort this bit of the file
         copySubRangeOfFile(self.inputFile, self.fileStart, self.fileEnd, self.outputFile)
         sort(self.outputFile)
Beispiel #7
0
 def testJobTreeStats_SortSimple(self):
     """Tests the jobTreeStats utility using the scriptTree_sort example.
     """
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile = getTempFile(rootDir=tempDir)
         outputFile = getTempFile(rootDir=tempDir)
         jobTreeDir = os.path.join(tempDir, "jobTree")
         lines=100000
         maxLineLength=10
         N=1000
         makeFileToSort(tempFile, lines, maxLineLength)
         #Sort the file
         command = "scriptTreeTest_Sort.py --jobTree %s --logLevel=DEBUG --fileToSort=%s --N %s --stats --jobTime 0.5" % (jobTreeDir, tempFile, N)
         system(command)
         #Now get the stats
         system("jobTreeStats --jobTree %s --outputFile %s" % (jobTreeDir, outputFile))
         #Cleanup
         system("rm -rf %s" % tempDir)
Beispiel #8
0
 def makeRunnable(self, tempDir):
     pickleFile = getTempFile(".pickle", tempDir)
     fileHandle = open(pickleFile, 'w')
     cPickle.dump(self, fileHandle, cPickle.HIGHEST_PROTOCOL)
     fileHandle.close() 
     i = set()
     for target in self.stack:
         for importString in target.importStrings:
             i.add(importString)
     classNames = " ".join(i)
     return "scriptTree --job JOB_FILE --target %s %s" % (pickleFile, classNames)
Beispiel #9
0
 def testGetMidPoint(self):
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile = getTempFile(rootDir=tempDir)
         makeFileToSort(tempFile)
         l = open(tempFile, 'r').read()
         fileSize = os.path.getsize(tempFile)
         midPoint = getMidPoint(tempFile, 0, fileSize)
         print "the mid point is %i of a file of %i bytes woth byte" % (midPoint, fileSize)
         assert midPoint < fileSize
         assert l[midPoint] == '\n'
         assert midPoint >= 0
         system("rm -rf %s" % tempDir)
Beispiel #10
0
 def run(self):
     while True:
         command, logFile, jobID = self.inputQueue.get()
         #fnull = open(os.devnull, 'w') #Pipe the output to dev/null (it is caught by the slave and will be reported if there is an error)
         tempLogFile = getTempFile()
         fileHandle = open(tempLogFile, 'w')
         process = subprocess.Popen(command, shell=True, stdout = fileHandle, stderr = fileHandle)
         sts = os.waitpid(process.pid, 0)
         fileHandle.close()
         #fnull.close()
         if os.path.exists(tempLogFile):
             system("mv %s %s" % (tempLogFile, logFile))
         self.outputQueue.put((command, sts[1], jobID))
         self.inputQueue.task_done()
Beispiel #11
0
def scriptTree_SortTest(testNo, batchSystem, lines=100000, maxLineLength=10, N=1000):
    """Tests scriptTree/jobTree by sorting a file in parallel.
    """
    for test in xrange(testNo):
        tempDir = getTempDirectory(os.getcwd())
        tempFile = getTempFile(rootDir=tempDir)
        jobTreeDir = os.path.join(tempDir, "jobTree")
        makeFileToSort(tempFile, lines=lines, maxLineLength=maxLineLength)
        #First make our own sorted version
        fileHandle = open(tempFile, 'r')
        l = fileHandle.readlines()
        l.sort()
        fileHandle.close()
        #Sort the file
        command = "scriptTreeTest_Sort.py --jobTree %s --logLevel=DEBUG --fileToSort=%s --N %i --batchSystem %s --jobTime 1.0" % (jobTreeDir, tempFile, N, batchSystem)
        system(command)
        #Now check the file is properly sorted..
        #Now get the sorted file
        fileHandle = open(tempFile, 'r')
        l2 = fileHandle.readlines()
        fileHandle.close()
        checkEqual(l, l2)
        system("rm -rf %s" % tempDir)
Beispiel #12
0
 def run(self):
     tempOutputFile = getTempFile(rootDir=self.getGlobalTempDir())
     self.addChildTarget(Down(self.inputFile, 0, os.path.getsize(self.inputFile), self.N, tempOutputFile))
     self.setFollowOnTarget(Cleanup(tempOutputFile, self.inputFile))
Beispiel #13
0
def processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats):
    """Runs a job.
    """
    from jobTree.src.bioio import getTempFile
    from jobTree.src.bioio import getTempDirectory
    from jobTree.src.bioio import logger
    from jobTree.src.bioio import system
    from jobTree.src.bioio import getTotalCpuTime
    
    assert len(job.find("children").findall("child")) == 0
    assert int(job.attrib["child_count"]) == int(job.attrib["black_child_count"])
    command = jobToRun.attrib["command"]
    #Copy the job file to be edited
    
    tempJob = ET.Element("job")
    ET.SubElement(tempJob, "children")
    
    #Log for job
    tempJob.attrib["log_level"] = job.attrib["log_level"]
    
    #Time length of 'ideal' job before further parallelism is required
    tempJob.attrib["job_time"] = job.attrib["job_time"]
    
    #Dir to put all the temp files in.
    localSlaveTempDir = getTempDirectory()

    #Temp file dirs for job.
    localTempDir = getTempDirectory(rootDir=localSlaveTempDir)
    tempJob.attrib["local_temp_dir"] = localTempDir
    depth = len(job.find("followOns").findall("followOn"))
    tempJob.attrib["global_temp_dir"] = os.path.join(job.attrib["global_temp_dir"], str(depth))
    if not os.path.isdir(tempJob.attrib["global_temp_dir"]): #Ensures that the global temp dirs of each level are kept separate.
        os.mkdir(tempJob.attrib["global_temp_dir"])
        os.chmod(tempJob.attrib["global_temp_dir"], 0777)
    if os.path.isdir(os.path.join(job.attrib["global_temp_dir"], str(depth+1))):
        system("rm -rf %s" % os.path.join(job.attrib["global_temp_dir"], str(depth+1)))
    assert not os.path.isdir(os.path.join(job.attrib["global_temp_dir"], str(depth+2)))
    
    #Deal with memory and cpu requirements (this pass tells the running job how much cpu and memory they have,
    #according to the batch system
    tempJob.attrib["available_memory"] = str(memoryAvailable)
    tempJob.attrib["available_cpu"] = str(cpuAvailable)
    if stats != None:
        tempJob.attrib["stats"] = getTempFile(rootDir=localSlaveTempDir)
        os.remove(tempJob.attrib["stats"])
    
    #Now write the temp job file
    tempFile = getTempFile(rootDir=localSlaveTempDir)
    fileHandle = open(tempFile, 'w') 
    tree = ET.ElementTree(tempJob)
    tree.write(fileHandle)
    fileHandle.close()
    logger.info("Copied the jobs files ready for the job")
    
    if "JOB_FILE" not in command:
        logger.critical("There is no 'JOB_FILE' string in the command to be run to take the job-file argument: %s" % command)
        job.attrib["colour"] = "red" #Update the colour
    else:
        #First load the environment for the job.
        fileHandle = open(job.attrib["environment_file"], 'r')
        environment = cPickle.load(fileHandle)
        fileHandle.close()
        logger.info("Loaded the environment for the process")
        
        #Run the actual command
        tempLogFile = getTempFile(suffix=".log", rootDir=localSlaveTempDir)
        fileHandle = open(tempLogFile, 'w')
        finalCommand = command.replace("JOB_FILE", tempFile)
        if stats != None:
            startTime = time.time()
            startClock = getTotalCpuTime()
        process = subprocess.Popen(finalCommand, shell=True, stdout=fileHandle, stderr=subprocess.STDOUT, env=environment)
            
        sts = os.waitpid(process.pid, 0)
        fileHandle.close()
        truncateFile(tempLogFile, int(job.attrib["max_log_file_size"]))
        
        #Copy across the log file
        system("mv %s %s" % (tempLogFile, job.attrib["log_file"]))
        i = sts[1]
        
        logger.info("Ran the job command=%s with exit status %i" % (finalCommand, i))
        
        if i == 0:
            logger.info("Passed the job, okay")
            
            if stats != None:
                jobTag = ET.SubElement(stats, "job", { "time":str(time.time() - startTime), "clock":str(getTotalCpuTime() - startClock) })
                if os.path.exists(tempJob.attrib["stats"]):
                    jobTag.append(ET.parse(tempJob.attrib["stats"]).getroot())
            
            tempJob = ET.parse(tempFile).getroot()
            job.attrib["colour"] = "black" #Update the colour
            
            #Deal with any logging messages directed at the master
            if tempJob.find("messages") != None:
                messages = job.find("messages")
                if messages == None:
                    messages = ET.SubElement(job, "messages")
                for messageTag in tempJob.find("messages").findall("message"):
                    messages.append(messageTag)
            
            #Update the runtime of the stack..
            totalRuntime = float(job.attrib["total_time"])  #This is the estimate runtime of the jobs on the followon stack
            runtime = float(jobToRun.attrib["time"])
            totalRuntime -= runtime
            if totalRuntime < 0.0:
                totalRuntime = 0.0
            
            #The children
            children = job.find("children")
            assert len(children.findall("child")) == 0 #The children
            assert tempJob.find("children") != None
            for child in tempJob.find("children").findall("child"):
                memory, cpu, compTime = getMemoryCpuAndTimeRequirements(job, child)
                ET.SubElement(children, "child", { "command":child.attrib["command"], 
                        "time":str(compTime), "memory":str(memory), "cpu":str(cpu) })
                logger.info("Making a child with command: %s" % (child.attrib["command"]))
            
            #The follow on command
            followOns = job.find("followOns")
            followOns.remove(followOns.findall("followOn")[-1]) #Remove the old job
            if tempJob.attrib.has_key("command"):
                memory, cpu, compTime = getMemoryCpuAndTimeRequirements(job, tempJob)
                ET.SubElement(followOns, "followOn", { "command":tempJob.attrib["command"], 
                        "time":str(compTime), "memory":str(memory), "cpu":str(cpu) })
                ##Add the runtime to the total runtime..
                totalRuntime += compTime
                logger.info("Making a follow on job with command: %s" % tempJob.attrib["command"])
                
            elif len(tempJob.find("children").findall("child")) != 0: #This is to keep the stack of follow on jobs consistent.
                ET.SubElement(followOns, "followOn", { "command":"echo JOB_FILE", "time":"0", "memory":"1000000", "cpu":"1" })
                logger.info("Making a stub follow on job")
            #Write back the runtime, after addin the follow on time and subtracting the time of the run job.
            job.attrib["total_time"] = str(totalRuntime)
        else:
            logger.info("Failed the job")
            job.attrib["colour"] = "red" #Update the colour
    
    #Clean up
    system("rm -rf %s" % (localSlaveTempDir))
    logger.info("Cleaned up by removing temp jobfile (the copy), and the temporary file directory for the job")