def main(): ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles", "%prog 0.1") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") ########################################## #Get the input data etc. ########################################## assert len(args) % 2 == 0 stats = [(ET.parse(statsFile).getroot(), regionName) for statsFile, regionName in zip(args[::2], args[1::2])] ########################################## #Make the scatter plots ########################################## chainScatterPlots(stats) blockScatterPlots(stats)
def main(): parser = getBasicOptionParser("usage: %prog [--jobTree] JOB_TREE_DIR [more options]", "%prog 0.1") parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree to kill") options, args = parseBasicOptions(parser) if len(sys.argv) == 1: parser.print_help() sys.exit(0) assert len(args) <= 1 #Only jobtree may be specified as argument if len(args) == 1: #Allow jobTree directory as arg options.jobTree = args[0] logger.info("Parsed arguments") assert options.jobTree != None #The jobtree should not be null assert os.path.isdir(options.jobTree) #The job tree must exist if we are going to kill it. logger.info("Starting routine to kill running jobs in the jobTree: %s" % options.jobTree) config = ET.parse(getConfigFileName(options.jobTree)).getroot() batchSystem = loadTheBatchSystem(config) #This should automatically kill the existing jobs.. so we're good. for jobID in batchSystem.getIssuedJobIDs(): #Just in case we do it again. batchSystem.killJobs(jobID) logger.info("All jobs SHOULD have been killed")
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option("--treePointer", dest="treePointerFile", help="File containing pointer to the tree data", default="None") options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointerFile).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") for child in tree.find("children").findall("child"): #Make the chuld tree pointer childTreePointerFile = makeTreePointer(child.attrib["file"], getTempFile(rootDir=job.attrib["global_temp_dir"])) #Make the child command unbornChild = ET.SubElement(job.find("children"), "child") command = "jobTreeTest_CommandFirst.py --treePointer %s --job JOB_FILE" % \ (childTreePointerFile,) unbornChild.attrib["command"] = command if random.random() > 0.2: unbornChild.attrib["time"] = str(random.random() * 10) #Make the child tree pointer ET.SubElement(treePointer.find("children"), "child", { "file":childTreePointerFile }) job.attrib["command"] = "jobTreeTest_CommandSecond.py --treePointer %s --job JOB_FILE" % \ (options.treePointerFile,) logger.info("Made new command") fileHandle = open(options.jobFile, 'w') ET.ElementTree(job).write(fileHandle) fileHandle.close() logger.info("Updated the job file") print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs print "Checking that we can report to std out" if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def main(): """ Reports stats on the job-tree, use with --stats option to jobTree. """ parser = getBasicOptionParser( "usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1") initializeOptions(parser) options, args = parseBasicOptions(parser) checkOptions(options, args, parser) collatedStatsTag = cacheAvailable(options) if collatedStatsTag is None: config, stats = getSettings(options) collatedStatsTag = processData(config, stats, options) reportData(collatedStatsTag, options) packData(collatedStatsTag, options)
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree to kill") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert len(args) == 0 # This program takes no arguments assert options.jobTree != None # The jobtree should not be null assert os.path.isdir(options.jobTree) # The job tree must exist if we are going to kill it. logger.info("Starting routine to kill running jobs in the jobTree: %s" % options.jobTree) config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot() batchSystem = loadTheBatchSystem(config) # This should automatically kill the existing jobs.. so we're good. for job in batchSystem.getIssuedJobIDs(): # Just in case we do it again. batchSystem.killJobs(job) logger.info("All jobs SHOULD have been killed")
def main(): ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles", "%prog 0.1") parser.add_option("--outputFile", dest="outputFile", help="File to put the latex tables in.") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert options.outputFile != None ########################################## #Get the input data etc. ########################################## assert len(args) % 2 == 0 stats = [(ET.parse(statsFile).getroot(), regionName) for statsFile, regionName in zip(args[::2], args[1::2])] fileHandle = open(options.outputFile, "w") ########################################## #Make the document ########################################## writeDocumentPreliminaries(fileHandle) writeFlowerTable(stats, fileHandle) writeBlocksTable(stats, fileHandle) writeChainsTable(stats, fileHandle) writeTerminalGroupsTable(stats, fileHandle) writeFacesTable(stats, fileHandle) writeReferenceTable(stats, fileHandle) writeDocumentEnd(fileHandle) ########################################## #Cleanup ########################################## fileHandle.close()
def main(): ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles", "%prog 0.1") parser.add_option("--outputFile", dest="outputFile", help="File to put the latex tables in.") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert options.outputFile != None ########################################## #Get the input data etc. ########################################## assert len(args) % 2 == 0 stats = [ (ET.parse(statsFile).getroot(), regionName) for statsFile, regionName in zip(args[::2], args[1::2]) ] fileHandle = open(options.outputFile, "w") ########################################## #Make the document ########################################## writeDocumentPreliminaries(fileHandle) writeFlowerTable(stats, fileHandle) writeBlocksTable(stats, fileHandle) writeChainsTable(stats, fileHandle) writeTerminalGroupsTable(stats, fileHandle) writeFacesTable(stats, fileHandle) writeReferenceTable(stats, fileHandle) writeDocumentEnd(fileHandle) ########################################## #Cleanup ########################################## fileHandle.close()
def main(): ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles", "%prog 0.1") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") ########################################## #Get the input data etc. ########################################## assert len(args) % 2 == 0 stats = [ (ET.parse(statsFile).getroot(), regionName) for statsFile, regionName in zip(args[::2], args[1::2]) ] ########################################## #Make the scatter plots ########################################## chainScatterPlots(stats) blockScatterPlots(stats)
def main(): sys.path.append(sys.argv[1]) sys.argv.remove(sys.argv[1]) #Now we can import all the stuff.. from sonLib.bioio import getBasicOptionParser from sonLib.bioio import parseBasicOptions from sonLib.bioio import logger from sonLib.bioio import addLoggingFileHandler from sonLib.bioio import setLogLevel from sonLib.bioio import getTotalCpuTime from sonLib.bioio import getTempDirectory from jobTree.src.master import writeJobs from jobTree.src.master import readJob from sonLib.bioio import system ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") options, args = parseBasicOptions(parser) assert len(args) == 0 ########################################## #Parse the job. ########################################## job = readJob(options.jobFile) ########################################## #Setup the logging ########################################## #Setup the logging setLogLevel(job.attrib["log_level"]) addLoggingFileHandler(job.attrib["slave_log_file"], rotatingLogging=False) logger.info("Parsed arguments and set up logging") ########################################## #Setup the stats, if requested ########################################## if job.attrib.has_key("stats"): startTime = time.time() startClock = getTotalCpuTime() stats = ET.Element("slave") else: stats = None ########################################## #Load the environment for the job ########################################## #First load the environment for the job. fileHandle = open(job.attrib["environment_file"], 'r') environment = cPickle.load(fileHandle) fileHandle.close() for i in environment: if i not in ("TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE"): os.environ[i] = environment[i] # sys.path is used by __import__ to find modules if "PYTHONPATH" in environment: for e in environment["PYTHONPATH"].split(':'): if e != '': sys.path.append(e) #os.environ = environment #os.putenv(key, value) logger.info("Loaded the environment for the process") ########################################## #Setup the temporary directories. ########################################## #Dir to put all the temp files in. localSlaveTempDir = getTempDirectory() localTempDir = os.path.join(localSlaveTempDir, "localTempDir") os.mkdir(localTempDir) os.chmod(localTempDir, 0777) ########################################## #Run the script. ########################################## maxTime = float(job.attrib["job_time"]) assert maxTime > 0.0 assert maxTime < sys.maxint jobToRun = job.find("followOns").findall("followOn")[-1] memoryAvailable = int(jobToRun.attrib["memory"]) cpuAvailable = int(jobToRun.attrib["cpu"]) startTime = time.time() while True: tempLogFile = processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats, environment, localSlaveTempDir, localTempDir) if job.attrib["colour"] != "black": logger.critical("Exiting the slave because of a failed job") system("mv %s %s" % (tempLogFile, job.attrib["log_file"])) #Copy back the job log file, because we saw failure break elif job.attrib.has_key("reportAllJobLogFiles"): logger.info("Exiting because we've been asked to report all logs, and this involves returning to the master") #Copy across the log file system("mv %s %s" % (tempLogFile, job.attrib["log_file"])) break totalRuntime = float(job.attrib["total_time"]) #This is the estimate runtime of the jobs on the followon stack childrenNode = job.find("children") childrenList = childrenNode.findall("child") #childRuntime = sum([ float(child.attrib["time"]) for child in childrenList ]) if len(childrenList) >= 2: # or totalRuntime + childRuntime > maxTime: #We are going to have to return to the parent logger.info("No more jobs can run in series by this slave, its got %i children" % len(childrenList)) break if time.time() - startTime > maxTime: logger.info("We are breaking because the maximum time the job should run for has been exceeded") break followOns = job.find("followOns") while len(childrenList) > 0: child = childrenList.pop() childrenNode.remove(child) totalRuntime += float(child.attrib["time"]) ET.SubElement(followOns, "followOn", child.attrib.copy()) #assert totalRuntime <= maxTime + 1 #The plus one second to avoid unimportant rounding errors job.attrib["total_time"] = str(totalRuntime) assert len(childrenNode.findall("child")) == 0 if len(followOns.findall("followOn")) == 0: logger.info("No more jobs can run by this slave as we have exhausted the follow ons") break #Get the next job and see if we have enough cpu and memory to run it.. jobToRun = job.find("followOns").findall("followOn")[-1] if int(jobToRun.attrib["memory"]) > memoryAvailable: logger.info("We need more memory for the next job, so finishing") break if int(jobToRun.attrib["cpu"]) > cpuAvailable: logger.info("We need more cpus for the next job, so finishing") break ##Updated the job so we can start the next loop cycle job.attrib["colour"] = "grey" writeJobs([ job ]) logger.info("Updated the status of the job to grey and starting the next job") #Write back the job file with the updated jobs, using the checkpoint method. writeJobs([ job ]) logger.info("Written out an updated job file") logger.info("Finished running the chain of jobs on this node, we ran for a total of %f seconds" % (time.time() - startTime)) ########################################## #Cleanup the temporary directory ########################################## system("rm -rf %s" % localSlaveTempDir) ########################################## #Finish up the stats ########################################## if stats != None: stats.attrib["time"] = str(time.time() - startTime) stats.attrib["clock"] = str(getTotalCpuTime() - startClock) fileHandle = open(job.attrib["stats"], 'w') ET.ElementTree(stats).write(fileHandle) fileHandle.close()
def main(): """Reports the state of the job tree. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser( "usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1") parser.add_option( "--jobTree", dest="jobTree", help= "Directory containing the job tree. The jobTree location can also be specified as the argument to the script. default=%default", default='./jobTree') parser.add_option( "--verbose", dest="verbose", action="store_true", help= "Print loads of information, particularly all the log files of jobs that failed. default=%default", default=False) parser.add_option( "--failIfNotComplete", dest="failIfNotComplete", action="store_true", help= "Return exit value of 1 if job tree jobs not all completed. default=%default", default=False) options, args = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) assert len(args) <= 1 #Only jobtree may be specified as argument if len(args) == 1: #Allow jobTree directory as arg options.jobTree = args[0] ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for job tree") assert options.jobTree != None assert os.path.isdir(options.jobTree) #The given job dir tree must exist. assert os.path.isfile(getConfigFileName( options.jobTree)) #A valid job tree must contain the config gile assert os.path.isdir(getJobFileDirName( options.jobTree)) #A job tree must have a directory of jobs. ########################################## #Survey the status of the job and report. ########################################## childJobFileToParentJob, childCounts, updatedJobFiles, shellJobs = {}, {}, set( ), set() parseJobFiles(getJobFileDirName(options.jobTree), updatedJobFiles, childJobFileToParentJob, childCounts, shellJobs) failedJobs = [ job for job in updatedJobFiles | set(childCounts.keys()) if job.remainingRetryCount == 0 ] print "There are %i active jobs, %i parent jobs with children, %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) currently in job tree: %s" % \ (len(updatedJobFiles), len(childCounts), len(failedJobs), len(shellJobs), options.jobTree) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if os.path.isfile(job.getLogFileName()): print "Log file of failed job: %s" % job.getLogFileName() logFile(job.getLogFileName(), logger.critical) else: print "Log file for job %s is not present" % job.getJobFileName( ) if len(failedJobs) == 0: print "There are no failed jobs to report" if (len(updatedJobFiles) + len(childCounts)) != 0 and options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the job tree. """ ########################################## # Construct the arguments. ########################################## parser = getBasicOptionParser( "usage: %prog [options] \nThe colours returned indicate the state of the job.\n\ \twhite: job has not been started yet\n\ \tgrey: job is issued to batch system\n\ \tred: job failed\n\ \tblue: job has children currently being processed\n\ \tblack: job has finished and will be processed (transient state)\n\ \tdead: job is totally finished and is awaiting deletion (transient state)", "%prog 0.1", ) parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree") parser.add_option( "--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of errors. default=%default", default=False, ) parser.add_option( "--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if job tree jobs not all completed. default=%default", default=False, ) options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert len(args) == 0 if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## # Do some checks. ########################################## logger.info("Checking if we have files for job tree") assert options.jobTree != None assert os.path.isdir(options.jobTree) # The given job dir tree must exist. assert os.path.isfile(os.path.join(options.jobTree, "config.xml")) # A valid job tree must contain the config gile assert os.path.isdir(os.path.join(options.jobTree, "jobs")) # A job tree must have a directory of jobs. assert os.path.isdir( os.path.join(options.jobTree, "tempDirDir") ) # A job tree must have a directory of temporary directories (for jobs to make temp files in). assert os.path.isdir(os.path.join(options.jobTree, "logFileDir")) # A job tree must have a directory of log files. assert os.path.isdir( os.path.join(options.jobTree, "slaveLogFileDir") ) # A job tree must have a directory of slave log files. ########################################## # Read the total job number ########################################## config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot() ########################################## # Survey the status of the job and report. ########################################## colours = {} jobFiles = TempFileTree(config.attrib["job_file_dir"]).listFiles() if len(jobFiles) > 0: logger.info("Collating the colours of the job tree") for absFileName in jobFiles: job = parseJobFile(absFileName) if job != None: if not colours.has_key(job.attrib["colour"]): colours[job.attrib["colour"]] = 0 colours[job.attrib["colour"]] += 1 else: logger.info("There are no jobs to collate") print "There are %i jobs currently in job tree: %s" % (len(jobFiles), options.jobTree) for colour in colours.keys(): print "\tColour: %s, number of jobs: %s" % (colour, colours[colour]) if options.verbose: # Verbose currently means outputting the files that have failed. for absFileName in jobFiles: job = parseJobFile(absFileName) if job != None: if job.attrib["colour"] == "red": if os.path.isfile(job.attrib["log_file"]): def fn(string): print string logFile(job.attrib["log_file"], fn) else: logger.info("Log file for job %s is not present" % job.attrib["file"]) if len(jobFiles) != 0 and options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the job tree. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1") parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree") parser.add_option("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of errors. default=%default", default=False) parser.add_option("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if job tree jobs not all completed. default=%default", default=False) options, args = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) assert len(args) <= 1 #Only jobtree may be specified as argument if len(args) == 1: #Allow jobTree directory as arg options.jobTree = args[0] ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for job tree") assert options.jobTree != None assert os.path.isdir(options.jobTree) #The given job dir tree must exist. assert os.path.isfile(getConfigFileName(options.jobTree)) #A valid job tree must contain the config gile assert os.path.isdir(getJobFileDirName(options.jobTree)) #A job tree must have a directory of jobs. ########################################## #Survey the status of the job and report. ########################################## childJobFileToParentJob, childCounts, updatedJobFiles, shellJobs = {}, {}, set(), set() parseJobFiles(getJobFileDirName(options.jobTree), updatedJobFiles, childJobFileToParentJob, childCounts, shellJobs) failedJobs = [ job for job in updatedJobFiles | set(childCounts.keys()) if job.remainingRetryCount == 0 ] print "There are %i active jobs, %i parent jobs with children, %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) currently in job tree: %s" % \ (len(updatedJobFiles), len(childCounts), len(failedJobs), len(shellJobs), options.jobTree) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if os.path.isfile(job.getLogFileName()): print "Log file of failed job: %s" % job.getLogFileName() logFile(job.getLogFileName(), logger.critical) else: print "Log file for job %s is not present" % job.getJobFileName() if len(failedJobs) == 0: print "There are no failed jobs to report" if (len(updatedJobFiles) + len(childCounts)) != 0 and options.failIfNotComplete: sys.exit(1)
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--job", dest="jobFile", help="Job file containing command to run", default="None") parser.add_option("--treePointer", dest="treePointer", help="File containing pointer to the tree data", default="None") options, args = parseBasicOptions(parser) logger.info("Parsed the input arguments") print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs print "Checking that we can report to std out" job = ET.parse(options.jobFile).getroot() setLogLevel(job.attrib["log_level"]) logger.info("Parsed the job XML") treePointer = ET.parse(options.treePointer).getroot() logger.info("Parsed the tree pointer XML") tree = ET.parse(treePointer.attrib["file"]).getroot() logger.info("Parsed the tree XML") i = 0 children = tree.find("children").findall("child") if len(children) > 0: for child in children: #Parse the child XML tree childTree = ET.parse(child.attrib["file"]).getroot() i += int(childTree.attrib["count"]) else: i = 1 tree.attrib["count"] = str(i) logger.info("Calculated the leaf count: %i" % i) fileHandle = open(treePointer.attrib["file"], 'w') ET.ElementTree(tree).write(fileHandle) fileHandle.close() logger.info("Updated the tree file: %s" % treePointer.attrib["file"]) for childPointer in treePointer.find("children").findall("child"): if os.path.isfile(childPointer.attrib["file"]): os.remove(childPointer.attrib["file"]) logger.info("Removed the child pointer files") logger.info("No need to update the job file, as we didn't make anything new!") if random.random() > 0.9: logger.info("Going to fail the job") sys.exit(1) logger.info("Going to pass the job done okay") sys.exit(0)
def main(): ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--results1", dest="results1", help="File containing the first XML formatted MAF comparsion results.") parser.add_option("--results2", dest="results2", help="File containing the second XML formatted MAF comparsion results.") parser.add_option("--inputFile", dest="inputFile", help="File containing a list of XML formatted MAF comparison results.") parser.add_option("--inputDir", dest="inputDir", help="Directory containing the XML formatted MAF comparison results.") parser.add_option("--outputFile", dest="outputFile", type="string", help="The file to put the aggregated results in (can be the same as either of the two inputs)") options, args = parseBasicOptions(parser) assert len(args) == 0 logger.info("Parsed arguments") assert (options.results1 != None and options.results2 != None) or (options.inputDir != None) or (options.inputFile != None) assert options.outputFile != None ########################################## #Do the merging. ########################################## xmlList = [] if options.inputDir != None: try: fileList = os.listdir(options.inputDir) except: print >> sys.stderr, "Error: Can't open dir '%s'" % options.inputDir sys.exit(-1) for file in fileList: if file.endswith(".xml"): if options.inputDir.startswith("/"): xmlList.append(options.inputDir + "/" + file) else: xmlList.append(os.getcwd() + "/" + options.inputDir + "/" + file) elif options.inputFile != None: try: f = open(options.inputFile) except: print >> sys.stderr, "Error: Can't open file '%s'" % options.inputFile sys.exit(-1) for line in f: line = line.rstrip() xmlList.append(line) f.close() elif options.results1 != None and options.results2 != None: xmlList.append(options.results1) xmlList.append(options.results2) else: print >> sys.stderr, "Error: Need to specify input" sys.exit(-1) baseFile = xmlList.pop() resultsTree1 = ET.parse(baseFile).getroot() homologyTestsList1 = resultsTree1.findall("homology_tests") for i in xrange(len(homologyTestsList1)): homologyTests1 = homologyTestsList1[i] for homologyTest1 in homologyTests1.findall("homology_test"): for singleHomologyTest1 in homologyTest1.findall("single_homology_test"): singleHomologyTest1.attrib["srcFile"] = str(baseFile) for xmlFile in xmlList: resultsTree2 = ET.parse(xmlFile).getroot() #Add the time to the previous time if resultsTree2.attrib.has_key("time") and resultsTree1.attrib.has_key("time"): resultsTree1.attrib["time"] = str(float(resultsTree1.attrib["time"]) + float(resultsTree2.attrib["time"])) homologyTestsList2 = resultsTree2.findall("homology_tests") assert len(homologyTestsList1) == len(homologyTestsList2) for i in xrange(len(homologyTestsList1)): homologyTests1 = homologyTestsList1[i] homologyTests2 = homologyTestsList2[i] assert(homologyTests1.attrib["near"] == homologyTests2.attrib["near"]) def mergeAggregateResults(tag1, tag2): def sum(i, j): i.attrib["totalTests"] = str(int(i.attrib["totalTests"]) + int(j.attrib["totalTests"])) i.attrib["totalTrue"] = str(int(i.attrib["totalTrue"]) + int(j.attrib["totalTrue"])) i.attrib["totalFalse"] = str(int(i.attrib["totalFalse"]) + int(j.attrib["totalFalse"])) if int(i.attrib["totalTests"]) != 0: i.attrib["average"] = str(float(i.attrib["totalTrue"]) / float(i.attrib["totalTests"])) assert tag1.tag == "aggregate_results" assert tag2.tag == "aggregate_results" sum(tag1.find("all"), tag2.find("all")) sum(tag1.find("both"), tag2.find("both")) sum(tag1.find("A"), tag2.find("A")) sum(tag1.find("B"), tag2.find("B")) sum(tag1.find("neither"), tag2.find("neither")) #Do merge for tests in both sets for homologyTest1 in homologyTests1.find("homology_pair_tests").findall("homology_test"): for homologyTest2 in homologyTests2.find("homology_pair_tests").findall("homology_test"): if homologyTest1.attrib["sequenceA"] == homologyTest2.attrib["sequenceA"] and homologyTest1.attrib["sequenceB"] == homologyTest2.attrib["sequenceB"]: mergeAggregateResults(homologyTest1.find("aggregate_results"), homologyTest2.find("aggregate_results")) for singleHomologyTest2 in homologyTest2.find("single_homology_tests").findall("single_homology_test"): homologyTest1.find("single_homology_tests").insert(0, singleHomologyTest2) #Now add in tests not in the intersection of the results l = [] for homologyTest2 in homologyTests2.find("homology_pair_tests").findall("homology_test"): for homologyTest1 in homologyTests1.find("homology_pair_tests").findall("homology_test"): if homologyTest1.attrib["sequenceA"] == homologyTest2.attrib["sequenceA"] and homologyTest1.attrib["sequenceB"] == homologyTest2.attrib["sequenceB"]: break else: l.append(homologyTest2) for homologyTest2 in l: homologyTests1.find("homology_pair_tests").insert(0, homologyTest2) #Now recalculate the totals mergeAggregateResults(homologyTests1.find("aggregate_results"), homologyTests2.find("aggregate_results")) #Write to the results file. fileHandle = open(options.outputFile, 'w') # tree = ET.ElementTree(resultsTree1) # tree.write(fileHandle) fileHandle.write(prettify(resultsTree1)) fileHandle.close() return
def main(): """Reports stats on the job-tree, use in conjunction with --stats options to jobTree. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog", "%prog 0.1") parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree") parser.add_option("--outputFile", dest="outputFile", default=None, help="File in which to write results") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert len(args) == 0 if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for job tree") if options.jobTree == None: raise RuntimeError("You did not specify the job-tree") if not os.path.isdir(options.jobTree): raise RuntimeError("The given job dir tree does not exist: %s" % options.jobTree) if not os.path.isfile(os.path.join(options.jobTree, "config.xml")): raise RuntimeError("A valid job tree must contain the config file") if not os.path.isfile(os.path.join(options.jobTree, "stats.xml")): raise RuntimeError("The job-tree was run without the --stats flag, so no stats were created") ########################################## #Read the stats and config ########################################## config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot() stats = ET.parse(os.path.join(options.jobTree, "stats.xml")).getroot() ########################################## #Collate the stats and report ########################################## def round(i): if i < 0: logger.debug("I got a less than 0 value: %s" % i) return 0.0 return i def fn(element, items, itemName): itemTimes = [ round(float(item.attrib["time"])) for item in items ] itemTimes.sort() itemClocks = [ round(float(item.attrib["clock"])) for item in items ] itemClocks.sort() itemWaits = [ round(round(float(item.attrib["time"])) - round(float(item.attrib["clock"]))) for item in items ] itemWaits.sort() assert len(itemClocks) == len(itemTimes) assert len(itemClocks) == len(itemWaits) if len(itemTimes) == 0: itemTimes.append(0) itemClocks.append(0) itemWaits.append(0) return ET.SubElement(element, itemName, { "total_number":str(len(items)), "total_time":str(sum(itemTimes)), "median_time":str(itemTimes[len(itemTimes)/2]), "average_time":str(sum(itemTimes)/len(itemTimes)), "min_time":str(min(itemTimes)), "max_time":str(max(itemTimes)), "total_clock":str(sum(itemClocks)), "median_clock":str(itemClocks[len(itemClocks)/2]), "average_clock":str(sum(itemClocks)/len(itemClocks)), "min_clock":str(min(itemClocks)), "max_clock":str(max(itemClocks)), "total_wait":str(sum(itemWaits)), "median_wait":str(itemWaits[len(itemWaits)/2]), "average_wait":str(sum(itemWaits)/len(itemWaits)), "min_wait":str(min(itemWaits)), "max_wait":str(max(itemWaits)) }) def fn2(element, containingItems, containingItemName, getFn): itemCounts = [ len(getFn(containingItem)) for containingItem in containingItems ] itemCounts.sort() if len(itemCounts) == 0: itemCounts.append(0) element.attrib["median_number_per_%s" % containingItemName] = str(itemCounts[len(itemCounts)/2]) element.attrib["average_number_per_%s" % containingItemName] = str(float(sum(itemCounts))/len(itemCounts)) element.attrib["min_number_per_%s" % containingItemName] = str(min(itemCounts)) element.attrib["max_number_per_%s" % containingItemName] = str(max(itemCounts)) if stats.find("total_time") == None: #Hack to allow it to work on unfinished jobtrees. ET.SubElement(stats, "total_time", { "time":"0.0", "clock":"0.0"}) collatedStatsTag = ET.Element("collated_stats", { "total_run_time":stats.find("total_time").attrib["time"], "total_clock":stats.find("total_time").attrib["clock"], "batch_system":config.attrib["batch_system"], "job_time":config.attrib["job_time"], "default_memory":config.attrib["default_memory"], "default_cpu":config.attrib["default_cpu"], "max_jobs":config.attrib["max_jobs"], "max_threads":config.attrib["max_threads"] }) #Add slave info slaves = stats.findall("slave") fn(collatedStatsTag, slaves, "slave") #Add job info jobs = [] for slave in slaves: jobs += slave.findall("job") def fn3(slave): return slave.findall("job") fn2(fn(collatedStatsTag, jobs, "job"), slaves, "slave", fn3) #Add aggregated target info targets = [] for job in jobs: for stack in job.findall("stack"): targets += stack.findall("target") def fn4(job): targets = [] for stack in job.findall("stack"): targets += stack.findall("target") return targets fn2(fn(collatedStatsTag, targets, "target"), jobs, "job", fn4) #Get info for each target targetNames = set() for target in targets: targetNames.add(target.attrib["class"]) targetTypesTag = ET.SubElement(collatedStatsTag, "target_types") for targetName in targetNames: targetTypes = [ target for target in targets if target.attrib["class"] == targetName ] targetTypeTag = fn(targetTypesTag, targetTypes, targetName) estimatedRunTimes = [ float(target.attrib["e_time"]) for target in targetTypes ] targetTypeTag.attrib["estimated_time"] = str(sum(estimatedRunTimes)/len(estimatedRunTimes)) def prettify(elem): """Return a pretty-printed XML string for the Element. """ rough_string = ET.tostring(elem, 'utf-8') reparsed = minidom.parseString(rough_string) return reparsed.toprettyxml(indent=" ") #Now dump it all out to file if options.outputFile != None: fileHandle = open(options.outputFile, 'w') #ET.ElementTree(collatedStatsTag).write(fileHandle) fileHandle.write(prettify(collatedStatsTag)) fileHandle.close() #Now dump onto the screen print prettify(collatedStatsTag)