Example #1
0
def main():
    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles",
                                  "%prog 0.1")

    options, args = parseBasicOptions(parser)

    logger.info("Parsed arguments")

    ##########################################
    #Get the input data etc.
    ##########################################

    assert len(args) % 2 == 0
    stats = [(ET.parse(statsFile).getroot(), regionName)
             for statsFile, regionName in zip(args[::2], args[1::2])]

    ##########################################
    #Make the scatter plots
    ##########################################

    chainScatterPlots(stats)
    blockScatterPlots(stats)
Example #2
0
def main():
    parser = getBasicOptionParser("usage: %prog [--jobTree] JOB_TREE_DIR [more options]", "%prog 0.1")
    
    parser.add_option("--jobTree", dest="jobTree", 
                      help="Directory containing the job tree to kill")
    
    options, args = parseBasicOptions(parser)
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    assert len(args) <= 1 #Only jobtree may be specified as argument
    if len(args) == 1: #Allow jobTree directory as arg
        options.jobTree = args[0]
        
    logger.info("Parsed arguments")
    assert options.jobTree != None #The jobtree should not be null
    assert os.path.isdir(options.jobTree) #The job tree must exist if we are going to kill it.
    logger.info("Starting routine to kill running jobs in the jobTree: %s" % options.jobTree)
    config = ET.parse(getConfigFileName(options.jobTree)).getroot()
    batchSystem = loadTheBatchSystem(config) #This should automatically kill the existing jobs.. so we're good.
    for jobID in batchSystem.getIssuedJobIDs(): #Just in case we do it again.
        batchSystem.killJobs(jobID)
    logger.info("All jobs SHOULD have been killed")
def main():
    parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1")
    
    parser.add_option("--job", dest="jobFile", 
                      help="Job file containing command to run",
                      default="None")
    
    parser.add_option("--treePointer", dest="treePointerFile", 
                      help="File containing pointer to the tree data",
                      default="None")
    
    options, args = parseBasicOptions(parser)
    
    logger.info("Parsed the input arguments")
    
    job = ET.parse(options.jobFile).getroot() 
    setLogLevel(job.attrib["log_level"])
    
    logger.info("Parsed the job XML")
    
    treePointer = ET.parse(options.treePointerFile).getroot() 
    
    logger.info("Parsed the tree pointer XML")
    
    tree = ET.parse(treePointer.attrib["file"]).getroot()
    
    logger.info("Parsed the tree XML")
    
    for child in tree.find("children").findall("child"):
        #Make the chuld tree pointer
        childTreePointerFile = makeTreePointer(child.attrib["file"], getTempFile(rootDir=job.attrib["global_temp_dir"]))
        #Make the child command
        unbornChild = ET.SubElement(job.find("children"), "child")
        command = "jobTreeTest_CommandFirst.py --treePointer %s --job JOB_FILE" % \
        (childTreePointerFile,)
        unbornChild.attrib["command"] = command
        if random.random() > 0.2:
            unbornChild.attrib["time"] = str(random.random() * 10)
        #Make the child tree pointer
        ET.SubElement(treePointer.find("children"), "child", { "file":childTreePointerFile })
    
    job.attrib["command"] = "jobTreeTest_CommandSecond.py --treePointer %s --job JOB_FILE" % \
    (options.treePointerFile,)
    logger.info("Made new command")

    fileHandle = open(options.jobFile, 'w')
    ET.ElementTree(job).write(fileHandle)
    fileHandle.close()
    
    logger.info("Updated the job file")
    
    print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs
    print "Checking that we can report to std out"

    if random.random() > 0.9:
        logger.info("Going to fail the job")
        sys.exit(1)
    logger.info("Going to pass the job done okay")
    sys.exit(0)
Example #4
0
def main():
    """ Reports stats on the job-tree, use with --stats option to jobTree.
    """

    parser = getBasicOptionParser(
        "usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1")
    initializeOptions(parser)
    options, args = parseBasicOptions(parser)
    checkOptions(options, args, parser)
    collatedStatsTag = cacheAvailable(options)
    if collatedStatsTag is None:
        config, stats = getSettings(options)
        collatedStatsTag = processData(config, stats, options)
    reportData(collatedStatsTag, options)
    packData(collatedStatsTag, options)
Example #5
0
def main():
    """ Reports stats on the job-tree, use with --stats option to jobTree.
    """

    parser = getBasicOptionParser(
        "usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1")
    initializeOptions(parser)
    options, args = parseBasicOptions(parser)
    checkOptions(options, args, parser)
    collatedStatsTag = cacheAvailable(options)
    if collatedStatsTag is None:
        config, stats = getSettings(options)
        collatedStatsTag = processData(config, stats, options)
    reportData(collatedStatsTag, options)
    packData(collatedStatsTag, options)
Example #6
0
def main():
    parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1")

    parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree to kill")

    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    assert len(args) == 0  # This program takes no arguments
    assert options.jobTree != None  # The jobtree should not be null
    assert os.path.isdir(options.jobTree)  # The job tree must exist if we are going to kill it.
    logger.info("Starting routine to kill running jobs in the jobTree: %s" % options.jobTree)
    config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot()
    batchSystem = loadTheBatchSystem(config)  # This should automatically kill the existing jobs.. so we're good.
    for job in batchSystem.getIssuedJobIDs():  # Just in case we do it again.
        batchSystem.killJobs(job)
    logger.info("All jobs SHOULD have been killed")
def main():
    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles",
                                  "%prog 0.1")

    parser.add_option("--outputFile",
                      dest="outputFile",
                      help="File to put the latex tables in.")

    options, args = parseBasicOptions(parser)

    logger.info("Parsed arguments")

    assert options.outputFile != None

    ##########################################
    #Get the input data etc.
    ##########################################

    assert len(args) % 2 == 0
    stats = [(ET.parse(statsFile).getroot(), regionName)
             for statsFile, regionName in zip(args[::2], args[1::2])]
    fileHandle = open(options.outputFile, "w")

    ##########################################
    #Make the document
    ##########################################

    writeDocumentPreliminaries(fileHandle)
    writeFlowerTable(stats, fileHandle)
    writeBlocksTable(stats, fileHandle)
    writeChainsTable(stats, fileHandle)
    writeTerminalGroupsTable(stats, fileHandle)
    writeFacesTable(stats, fileHandle)
    writeReferenceTable(stats, fileHandle)
    writeDocumentEnd(fileHandle)

    ##########################################
    #Cleanup
    ##########################################

    fileHandle.close()
def main():
    ##########################################
    #Construct the arguments.
    ##########################################    
    
    parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles", "%prog 0.1")
    
    parser.add_option("--outputFile", dest="outputFile", 
                      help="File to put the latex tables in.")

    options, args = parseBasicOptions(parser)
        
    logger.info("Parsed arguments")
    
    assert options.outputFile != None
    
    ##########################################
    #Get the input data etc.
    ##########################################
    
    assert len(args) % 2 == 0
    stats = [ (ET.parse(statsFile).getroot(), regionName) for statsFile, regionName in zip(args[::2], args[1::2]) ] 
    fileHandle = open(options.outputFile, "w")
    
    ##########################################
    #Make the document
    ##########################################
    
    writeDocumentPreliminaries(fileHandle)
    writeFlowerTable(stats, fileHandle)
    writeBlocksTable(stats, fileHandle)
    writeChainsTable(stats, fileHandle)
    writeTerminalGroupsTable(stats, fileHandle)
    writeFacesTable(stats, fileHandle)
    writeReferenceTable(stats, fileHandle)
    writeDocumentEnd(fileHandle)
    
    ##########################################
    #Cleanup
    ##########################################
    
    fileHandle.close()
def main():
    ##########################################
    #Construct the arguments.
    ##########################################    
    
    parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles", "%prog 0.1")

    options, args = parseBasicOptions(parser)
        
    logger.info("Parsed arguments")
    
    ##########################################
    #Get the input data etc.
    ##########################################
    
    assert len(args) % 2 == 0
    stats = [ (ET.parse(statsFile).getroot(), regionName) for statsFile, regionName in zip(args[::2], args[1::2]) ] 
    
    ##########################################
    #Make the scatter plots
    ##########################################
    
    chainScatterPlots(stats)
    blockScatterPlots(stats)
Example #10
0
def main():
    sys.path.append(sys.argv[1])
    sys.argv.remove(sys.argv[1])
    
    #Now we can import all the stuff..
    from sonLib.bioio import getBasicOptionParser
    from sonLib.bioio import parseBasicOptions
    from sonLib.bioio import logger
    from sonLib.bioio import addLoggingFileHandler
    from sonLib.bioio import setLogLevel
    from sonLib.bioio import getTotalCpuTime
    from sonLib.bioio import getTempDirectory
    from jobTree.src.master import writeJobs
    from jobTree.src.master import readJob
    from sonLib.bioio import system
    
    ##########################################
    #Construct the arguments.
    ##########################################
    
    parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1")
    
    parser.add_option("--job", dest="jobFile", 
                      help="Job file containing command to run",
                      default="None")
    
    options, args = parseBasicOptions(parser)
    assert len(args) == 0

    ##########################################
    #Parse the job.
    ##########################################
    
    job = readJob(options.jobFile)
    
    ##########################################
    #Setup the logging
    ##########################################
    
    #Setup the logging
    setLogLevel(job.attrib["log_level"])
    addLoggingFileHandler(job.attrib["slave_log_file"], rotatingLogging=False)
    logger.info("Parsed arguments and set up logging")
    
    ##########################################
    #Setup the stats, if requested
    ##########################################
    
    if job.attrib.has_key("stats"):
        startTime = time.time()
        startClock = getTotalCpuTime()
        stats = ET.Element("slave")
    else:
        stats = None
    
    ##########################################
    #Load the environment for the job
    ##########################################
    
    #First load the environment for the job.
    fileHandle = open(job.attrib["environment_file"], 'r')
    environment = cPickle.load(fileHandle)
    fileHandle.close()
    for i in environment:
        if i not in ("TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE"):
            os.environ[i] = environment[i]
    # sys.path is used by __import__ to find modules
    if "PYTHONPATH" in environment:
        for e in environment["PYTHONPATH"].split(':'):
            if e != '':
                sys.path.append(e)
    #os.environ = environment
    #os.putenv(key, value)
    logger.info("Loaded the environment for the process")
        
    ##########################################
    #Setup the temporary directories.
    ##########################################
        
    #Dir to put all the temp files in.
    localSlaveTempDir = getTempDirectory()
    localTempDir = os.path.join(localSlaveTempDir, "localTempDir") 
    os.mkdir(localTempDir)
    os.chmod(localTempDir, 0777)
    
    ##########################################
    #Run the script.
    ##########################################
    
    maxTime = float(job.attrib["job_time"])
    assert maxTime > 0.0
    assert maxTime < sys.maxint
    jobToRun = job.find("followOns").findall("followOn")[-1]
    memoryAvailable = int(jobToRun.attrib["memory"])
    cpuAvailable = int(jobToRun.attrib["cpu"])
    startTime = time.time()
    while True:
        tempLogFile = processJob(job, jobToRun, memoryAvailable, cpuAvailable, stats, environment, localSlaveTempDir, localTempDir)
        
        if job.attrib["colour"] != "black": 
            logger.critical("Exiting the slave because of a failed job")
            system("mv %s %s" % (tempLogFile, job.attrib["log_file"])) #Copy back the job log file, because we saw failure
            break
        elif job.attrib.has_key("reportAllJobLogFiles"):
            logger.info("Exiting because we've been asked to report all logs, and this involves returning to the master")
            #Copy across the log file
            system("mv %s %s" % (tempLogFile, job.attrib["log_file"]))
            break
   
        totalRuntime = float(job.attrib["total_time"])  #This is the estimate runtime of the jobs on the followon stack
        
        childrenNode = job.find("children")
        childrenList = childrenNode.findall("child")
        #childRuntime = sum([ float(child.attrib["time"]) for child in childrenList ])
            
        if len(childrenList) >= 2: # or totalRuntime + childRuntime > maxTime: #We are going to have to return to the parent
            logger.info("No more jobs can run in series by this slave, its got %i children" % len(childrenList))
            break
        
        if time.time() - startTime > maxTime:
            logger.info("We are breaking because the maximum time the job should run for has been exceeded")
            break
        
        followOns = job.find("followOns")
        while len(childrenList) > 0:
            child = childrenList.pop()
            childrenNode.remove(child)
            totalRuntime += float(child.attrib["time"])
            ET.SubElement(followOns, "followOn", child.attrib.copy())
        #assert totalRuntime <= maxTime + 1 #The plus one second to avoid unimportant rounding errors
        job.attrib["total_time"] = str(totalRuntime)
        assert len(childrenNode.findall("child")) == 0
        
        if len(followOns.findall("followOn")) == 0:
            logger.info("No more jobs can run by this slave as we have exhausted the follow ons")
            break
        
        #Get the next job and see if we have enough cpu and memory to run it..
        jobToRun = job.find("followOns").findall("followOn")[-1]
        if int(jobToRun.attrib["memory"]) > memoryAvailable:
            logger.info("We need more memory for the next job, so finishing")
            break
        if int(jobToRun.attrib["cpu"]) > cpuAvailable:
            logger.info("We need more cpus for the next job, so finishing")
            break
        
        ##Updated the job so we can start the next loop cycle
        job.attrib["colour"] = "grey"
        writeJobs([ job ])
        logger.info("Updated the status of the job to grey and starting the next job")
    
    #Write back the job file with the updated jobs, using the checkpoint method.
    writeJobs([ job ])
    logger.info("Written out an updated job file")
    
    logger.info("Finished running the chain of jobs on this node, we ran for a total of %f seconds" % (time.time() - startTime))
    
    ##########################################
    #Cleanup the temporary directory
    ##########################################
    
    system("rm -rf %s" % localSlaveTempDir)
    
    ##########################################
    #Finish up the stats
    ##########################################
    
    if stats != None:
        stats.attrib["time"] = str(time.time() - startTime)
        stats.attrib["clock"] = str(getTotalCpuTime() - startClock)
        fileHandle = open(job.attrib["stats"], 'w')
        ET.ElementTree(stats).write(fileHandle)
        fileHandle.close()
Example #11
0
def main():
    """Reports the state of the job tree.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser(
        "usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1")

    parser.add_option(
        "--jobTree",
        dest="jobTree",
        help=
        "Directory containing the job tree. The jobTree location can also be specified as the argument to the script. default=%default",
        default='./jobTree')

    parser.add_option(
        "--verbose",
        dest="verbose",
        action="store_true",
        help=
        "Print loads of information, particularly all the log files of jobs that failed. default=%default",
        default=False)

    parser.add_option(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if job tree jobs not all completed. default=%default",
        default=False)

    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    assert len(args) <= 1  #Only jobtree may be specified as argument
    if len(args) == 1:  #Allow jobTree directory as arg
        options.jobTree = args[0]

    ##########################################
    #Do some checks.
    ##########################################

    logger.info("Checking if we have files for job tree")
    assert options.jobTree != None
    assert os.path.isdir(options.jobTree)  #The given job dir tree must exist.
    assert os.path.isfile(getConfigFileName(
        options.jobTree))  #A valid job tree must contain the config gile
    assert os.path.isdir(getJobFileDirName(
        options.jobTree))  #A job tree must have a directory of jobs.

    ##########################################
    #Survey the status of the job and report.
    ##########################################

    childJobFileToParentJob, childCounts, updatedJobFiles, shellJobs = {}, {}, set(
    ), set()
    parseJobFiles(getJobFileDirName(options.jobTree), updatedJobFiles,
                  childJobFileToParentJob, childCounts, shellJobs)

    failedJobs = [
        job for job in updatedJobFiles | set(childCounts.keys())
        if job.remainingRetryCount == 0
    ]

    print "There are %i active jobs, %i parent jobs with children, %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) currently in job tree: %s" % \
    (len(updatedJobFiles), len(childCounts), len(failedJobs), len(shellJobs), options.jobTree)

    if options.verbose:  #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if os.path.isfile(job.getLogFileName()):
                print "Log file of failed job: %s" % job.getLogFileName()
                logFile(job.getLogFileName(), logger.critical)
            else:
                print "Log file for job %s is not present" % job.getJobFileName(
                )
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"

    if (len(updatedJobFiles) +
            len(childCounts)) != 0 and options.failIfNotComplete:
        sys.exit(1)
Example #12
0
def main():
    """Reports the state of the job tree.
    """

    ##########################################
    # Construct the arguments.
    ##########################################

    parser = getBasicOptionParser(
        "usage: %prog [options] \nThe colours returned indicate the state of the job.\n\
\twhite: job has not been started yet\n\
\tgrey: job is issued to batch system\n\
\tred: job failed\n\
\tblue: job has children currently being processed\n\
\tblack: job has finished and will be processed (transient state)\n\
\tdead: job is totally finished and is awaiting deletion (transient state)",
        "%prog 0.1",
    )

    parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree")

    parser.add_option(
        "--verbose",
        dest="verbose",
        action="store_true",
        help="Print loads of information, particularly all the log files of errors. default=%default",
        default=False,
    )

    parser.add_option(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help="Return exit value of 1 if job tree jobs not all completed. default=%default",
        default=False,
    )

    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    assert len(args) == 0

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    ##########################################
    # Do some checks.
    ##########################################

    logger.info("Checking if we have files for job tree")
    assert options.jobTree != None
    assert os.path.isdir(options.jobTree)  # The given job dir tree must exist.
    assert os.path.isfile(os.path.join(options.jobTree, "config.xml"))  # A valid job tree must contain the config gile
    assert os.path.isdir(os.path.join(options.jobTree, "jobs"))  # A job tree must have a directory of jobs.
    assert os.path.isdir(
        os.path.join(options.jobTree, "tempDirDir")
    )  # A job tree must have a directory of temporary directories (for jobs to make temp files in).
    assert os.path.isdir(os.path.join(options.jobTree, "logFileDir"))  # A job tree must have a directory of log files.
    assert os.path.isdir(
        os.path.join(options.jobTree, "slaveLogFileDir")
    )  # A job tree must have a directory of slave log files.

    ##########################################
    # Read the total job number
    ##########################################

    config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot()

    ##########################################
    # Survey the status of the job and report.
    ##########################################

    colours = {}
    jobFiles = TempFileTree(config.attrib["job_file_dir"]).listFiles()
    if len(jobFiles) > 0:
        logger.info("Collating the colours of the job tree")
        for absFileName in jobFiles:
            job = parseJobFile(absFileName)
            if job != None:
                if not colours.has_key(job.attrib["colour"]):
                    colours[job.attrib["colour"]] = 0
                colours[job.attrib["colour"]] += 1
    else:
        logger.info("There are no jobs to collate")

    print "There are %i jobs currently in job tree: %s" % (len(jobFiles), options.jobTree)

    for colour in colours.keys():
        print "\tColour: %s, number of jobs: %s" % (colour, colours[colour])

    if options.verbose:  # Verbose currently means outputting the files that have failed.
        for absFileName in jobFiles:
            job = parseJobFile(absFileName)
            if job != None:
                if job.attrib["colour"] == "red":
                    if os.path.isfile(job.attrib["log_file"]):

                        def fn(string):
                            print string

                        logFile(job.attrib["log_file"], fn)
                    else:
                        logger.info("Log file for job %s is not present" % job.attrib["file"])

    if len(jobFiles) != 0 and options.failIfNotComplete:
        sys.exit(1)
Example #13
0
def main():
    """Reports the state of the job tree.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser("usage: %prog [--jobTree] JOB_TREE_DIR [options]", "%prog 0.1")
    
    parser.add_option("--jobTree", dest="jobTree", 
                      help="Directory containing the job tree")
    
    parser.add_option("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of errors. default=%default",
                      default=False)
    
    parser.add_option("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if job tree jobs not all completed. default=%default",
                      default=False)
    
    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    assert len(args) <= 1 #Only jobtree may be specified as argument
    if len(args) == 1: #Allow jobTree directory as arg
        options.jobTree = args[0]
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for job tree")
    assert options.jobTree != None
    assert os.path.isdir(options.jobTree) #The given job dir tree must exist.
    assert os.path.isfile(getConfigFileName(options.jobTree)) #A valid job tree must contain the config gile
    assert os.path.isdir(getJobFileDirName(options.jobTree)) #A job tree must have a directory of jobs.
    
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    childJobFileToParentJob, childCounts, updatedJobFiles, shellJobs = {}, {}, set(), set()
    parseJobFiles(getJobFileDirName(options.jobTree), updatedJobFiles, childJobFileToParentJob, childCounts, shellJobs)
    
    failedJobs = [ job for job in updatedJobFiles | set(childCounts.keys()) if job.remainingRetryCount == 0 ]
           
    print "There are %i active jobs, %i parent jobs with children, %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) currently in job tree: %s" % \
    (len(updatedJobFiles), len(childCounts), len(failedJobs), len(shellJobs), options.jobTree)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if os.path.isfile(job.getLogFileName()):
                print "Log file of failed job: %s" % job.getLogFileName()
                logFile(job.getLogFileName(), logger.critical)
            else:
                print "Log file for job %s is not present" % job.getJobFileName() 
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"   
    
    if (len(updatedJobFiles) + len(childCounts)) != 0 and options.failIfNotComplete:
        sys.exit(1)
def main():
    parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1")
    
    parser.add_option("--job", dest="jobFile", 
                      help="Job file containing command to run",
                      default="None")
    
    parser.add_option("--treePointer", dest="treePointer", 
                      help="File containing pointer to the tree data",
                      default="None")
    
    options, args = parseBasicOptions(parser)
    
    logger.info("Parsed the input arguments")
    
    print >>sys.stderr, "Checking that we can report to std err" #These lines should end up in the logs
    print "Checking that we can report to std out"
    
    job = ET.parse(options.jobFile).getroot() 
    setLogLevel(job.attrib["log_level"])
    
    logger.info("Parsed the job XML")
    
    treePointer = ET.parse(options.treePointer).getroot() 
    
    logger.info("Parsed the tree pointer XML")
    
    tree = ET.parse(treePointer.attrib["file"]).getroot()
    
    logger.info("Parsed the tree XML")
    
    i = 0
    children = tree.find("children").findall("child")
    if len(children) > 0:
        for child in children:
            #Parse the child XML tree
            childTree = ET.parse(child.attrib["file"]).getroot()
            i += int(childTree.attrib["count"])
    else:
        i = 1
    
    tree.attrib["count"] = str(i)
    
    logger.info("Calculated the leaf count: %i" % i)
    
    fileHandle = open(treePointer.attrib["file"], 'w')
    ET.ElementTree(tree).write(fileHandle)
    fileHandle.close()
    
    logger.info("Updated the tree file: %s" % treePointer.attrib["file"])
        
    for childPointer in treePointer.find("children").findall("child"):
        if os.path.isfile(childPointer.attrib["file"]):
            os.remove(childPointer.attrib["file"])
    
    logger.info("Removed the child pointer files")
    
    logger.info("No need to update the job file, as we didn't make anything new!")

    if random.random() > 0.9:
        logger.info("Going to fail the job")
        sys.exit(1)
    logger.info("Going to pass the job done okay")
    sys.exit(0)
def main():
    ##########################################
    #Construct the arguments.
    ##########################################    
    
    parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1")
    
    parser.add_option("--results1", dest="results1", 
                      help="File containing the first XML formatted MAF comparsion results.")
    
    parser.add_option("--results2", dest="results2", 
                      help="File containing the second XML formatted MAF comparsion results.")

    parser.add_option("--inputFile", dest="inputFile",
                      help="File containing a list of XML formatted MAF comparison results.")

    parser.add_option("--inputDir", dest="inputDir",
                      help="Directory containing the XML formatted MAF comparison results.")
    
    parser.add_option("--outputFile", dest="outputFile", type="string",
                      help="The file to put the aggregated results in (can be the same as either of the two inputs)")
        
    options, args = parseBasicOptions(parser)
        
    assert len(args) == 0
    logger.info("Parsed arguments")
    
    assert (options.results1 != None and options.results2 != None) or (options.inputDir != None) or (options.inputFile != None)
    assert options.outputFile != None
    
    ##########################################
    #Do the merging.
    ##########################################

    xmlList = []
    if options.inputDir != None:
        try:
            fileList = os.listdir(options.inputDir)
        except:
            print >> sys.stderr, "Error: Can't open dir '%s'" % options.inputDir
            sys.exit(-1)

        for file in fileList:
            if file.endswith(".xml"):
                if options.inputDir.startswith("/"):
                    xmlList.append(options.inputDir + "/" + file)
                else:
                    xmlList.append(os.getcwd() + "/" + options.inputDir + "/" + file)
    elif options.inputFile != None:
        try:
            f = open(options.inputFile)
        except:
            print >> sys.stderr, "Error: Can't open file '%s'" % options.inputFile
            sys.exit(-1)
        for line in f:
            line = line.rstrip()
            xmlList.append(line)
        f.close()
    elif options.results1 != None and options.results2 != None:
        xmlList.append(options.results1)
        xmlList.append(options.results2)
    else:
        print >> sys.stderr, "Error: Need to specify input"
        sys.exit(-1)

    baseFile = xmlList.pop()
    resultsTree1 = ET.parse(baseFile).getroot()
    homologyTestsList1 = resultsTree1.findall("homology_tests")
    for i in xrange(len(homologyTestsList1)):
        homologyTests1 = homologyTestsList1[i]
        for homologyTest1 in homologyTests1.findall("homology_test"):
            for singleHomologyTest1 in homologyTest1.findall("single_homology_test"):
                singleHomologyTest1.attrib["srcFile"] = str(baseFile)

    for xmlFile in xmlList:
        resultsTree2 = ET.parse(xmlFile).getroot()
        #Add the time to the previous time
        if resultsTree2.attrib.has_key("time") and resultsTree1.attrib.has_key("time"):
            resultsTree1.attrib["time"] = str(float(resultsTree1.attrib["time"]) + float(resultsTree2.attrib["time"]))
        
        homologyTestsList2 = resultsTree2.findall("homology_tests")
    
        assert len(homologyTestsList1) == len(homologyTestsList2)
        for i in xrange(len(homologyTestsList1)):
            homologyTests1 = homologyTestsList1[i]
            homologyTests2 = homologyTestsList2[i]
            assert(homologyTests1.attrib["near"] == homologyTests2.attrib["near"])
            
            def mergeAggregateResults(tag1, tag2):
                def sum(i, j):
                    i.attrib["totalTests"] = str(int(i.attrib["totalTests"]) + int(j.attrib["totalTests"]))
                    i.attrib["totalTrue"] = str(int(i.attrib["totalTrue"]) + int(j.attrib["totalTrue"]))
                    i.attrib["totalFalse"] = str(int(i.attrib["totalFalse"]) + int(j.attrib["totalFalse"]))
                    if int(i.attrib["totalTests"]) != 0:
                        i.attrib["average"] = str(float(i.attrib["totalTrue"]) / float(i.attrib["totalTests"]))
                assert tag1.tag == "aggregate_results"
                assert tag2.tag == "aggregate_results"
                sum(tag1.find("all"), tag2.find("all"))
                sum(tag1.find("both"), tag2.find("both"))
                sum(tag1.find("A"), tag2.find("A"))
                sum(tag1.find("B"), tag2.find("B"))
                sum(tag1.find("neither"), tag2.find("neither"))
            
            #Do merge for tests in both sets
            for homologyTest1 in homologyTests1.find("homology_pair_tests").findall("homology_test"):
                for homologyTest2 in homologyTests2.find("homology_pair_tests").findall("homology_test"):
                    if homologyTest1.attrib["sequenceA"] == homologyTest2.attrib["sequenceA"] and homologyTest1.attrib["sequenceB"] == homologyTest2.attrib["sequenceB"]:
                        mergeAggregateResults(homologyTest1.find("aggregate_results"), homologyTest2.find("aggregate_results"))
                        for singleHomologyTest2 in homologyTest2.find("single_homology_tests").findall("single_homology_test"):
                            homologyTest1.find("single_homology_tests").insert(0, singleHomologyTest2)
        
            #Now add in tests not in the intersection of the results
            l = []
            for homologyTest2 in homologyTests2.find("homology_pair_tests").findall("homology_test"):
                for homologyTest1 in homologyTests1.find("homology_pair_tests").findall("homology_test"):
                    if homologyTest1.attrib["sequenceA"] == homologyTest2.attrib["sequenceA"] and homologyTest1.attrib["sequenceB"] == homologyTest2.attrib["sequenceB"]:
                        break
                else:
                    l.append(homologyTest2)
        
            for homologyTest2 in l:
                homologyTests1.find("homology_pair_tests").insert(0, homologyTest2)
            
            #Now recalculate the totals
            mergeAggregateResults(homologyTests1.find("aggregate_results"), homologyTests2.find("aggregate_results"))
        
    #Write to the results file.
    fileHandle = open(options.outputFile, 'w')
#    tree = ET.ElementTree(resultsTree1)
#    tree.write(fileHandle)
    fileHandle.write(prettify(resultsTree1))
    fileHandle.close()

    return
Example #16
0
def main():
    """Reports stats on the job-tree, use in conjunction with --stats options to jobTree.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser("usage: %prog", "%prog 0.1")
    
    parser.add_option("--jobTree", dest="jobTree", 
                      help="Directory containing the job tree")
    
    parser.add_option("--outputFile", dest="outputFile", default=None,
                      help="File in which to write results")
    
    
    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    assert len(args) == 0
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for job tree")
    
    if options.jobTree == None:
        raise RuntimeError("You did not specify the job-tree")
    
    if not os.path.isdir(options.jobTree):
        raise RuntimeError("The given job dir tree does not exist: %s" % options.jobTree)
    
    if not os.path.isfile(os.path.join(options.jobTree, "config.xml")):
        raise RuntimeError("A valid job tree must contain the config file")
    
    if not os.path.isfile(os.path.join(options.jobTree, "stats.xml")):
        raise RuntimeError("The job-tree was run without the --stats flag, so no stats were created")
    
    ##########################################
    #Read the stats and config
    ##########################################  
    
    config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot()
    stats = ET.parse(os.path.join(options.jobTree, "stats.xml")).getroot()
    
    ##########################################
    #Collate the stats and report
    ##########################################  
    
    def round(i):
        if i < 0:
            logger.debug("I got a less than 0 value: %s" % i)
            return 0.0
        return i
    
    def fn(element, items, itemName):
        itemTimes = [ round(float(item.attrib["time"])) for item in items ]
        itemTimes.sort()
        itemClocks = [ round(float(item.attrib["clock"])) for item in items ]
        itemClocks.sort()
        itemWaits = [ round(round(float(item.attrib["time"])) - round(float(item.attrib["clock"]))) for item in items ]
        itemWaits.sort()
        assert len(itemClocks) == len(itemTimes)
        assert len(itemClocks) == len(itemWaits)
        if len(itemTimes) == 0:
            itemTimes.append(0)
            itemClocks.append(0)
            itemWaits.append(0)
        return ET.SubElement(element, itemName, { "total_number":str(len(items)),
                                               "total_time":str(sum(itemTimes)),
                                               "median_time":str(itemTimes[len(itemTimes)/2]),
                                               "average_time":str(sum(itemTimes)/len(itemTimes)),
                                               "min_time":str(min(itemTimes)),
                                               "max_time":str(max(itemTimes)),
                                               "total_clock":str(sum(itemClocks)),
                                               "median_clock":str(itemClocks[len(itemClocks)/2]),
                                               "average_clock":str(sum(itemClocks)/len(itemClocks)),
                                               "min_clock":str(min(itemClocks)),
                                               "max_clock":str(max(itemClocks)),
                                               "total_wait":str(sum(itemWaits)),
                                               "median_wait":str(itemWaits[len(itemWaits)/2]),
                                               "average_wait":str(sum(itemWaits)/len(itemWaits)),
                                               "min_wait":str(min(itemWaits)),
                                               "max_wait":str(max(itemWaits))
                                                })
    
    def fn2(element, containingItems, containingItemName, getFn):
        itemCounts = [ len(getFn(containingItem)) for containingItem in containingItems ]
        itemCounts.sort()
        if len(itemCounts) == 0: 
            itemCounts.append(0)
        element.attrib["median_number_per_%s" % containingItemName] = str(itemCounts[len(itemCounts)/2])
        element.attrib["average_number_per_%s" % containingItemName] = str(float(sum(itemCounts))/len(itemCounts))
        element.attrib["min_number_per_%s" % containingItemName] = str(min(itemCounts))
        element.attrib["max_number_per_%s" % containingItemName] = str(max(itemCounts))
    
    if stats.find("total_time") == None: #Hack to allow it to work on unfinished jobtrees.
        ET.SubElement(stats, "total_time", { "time":"0.0", "clock":"0.0"})
    
    collatedStatsTag = ET.Element("collated_stats", { "total_run_time":stats.find("total_time").attrib["time"],
                                                     "total_clock":stats.find("total_time").attrib["clock"],
                                                     "batch_system":config.attrib["batch_system"],
                                                     "job_time":config.attrib["job_time"],
                                                     "default_memory":config.attrib["default_memory"],
                                                     "default_cpu":config.attrib["default_cpu"],
                                                     "max_jobs":config.attrib["max_jobs"],
                                                     "max_threads":config.attrib["max_threads"] })
    
    #Add slave info
    slaves = stats.findall("slave")
    fn(collatedStatsTag, slaves, "slave")
    
    #Add job info
    jobs = []
    for slave in slaves:
        jobs += slave.findall("job")
    def fn3(slave):
        return slave.findall("job")
    fn2(fn(collatedStatsTag, jobs, "job"), slaves, "slave", fn3)
    
    #Add aggregated target info
    targets = []
    for job in jobs:
        for stack in job.findall("stack"):
            targets += stack.findall("target")
    def fn4(job):
        targets = []
        for stack in job.findall("stack"):
            targets += stack.findall("target")
        return targets
    fn2(fn(collatedStatsTag, targets, "target"), jobs, "job", fn4)   
    
    #Get info for each target
    targetNames = set()
    for target in targets:
        targetNames.add(target.attrib["class"])
    
    targetTypesTag = ET.SubElement(collatedStatsTag, "target_types")
    for targetName in targetNames:
        targetTypes = [ target for target in targets if target.attrib["class"] == targetName ]
        targetTypeTag = fn(targetTypesTag, targetTypes, targetName)
        estimatedRunTimes = [ float(target.attrib["e_time"]) for target in targetTypes ]
        targetTypeTag.attrib["estimated_time"] = str(sum(estimatedRunTimes)/len(estimatedRunTimes))
    
    def prettify(elem):
        """Return a pretty-printed XML string for the Element.
        """
        rough_string = ET.tostring(elem, 'utf-8')
        reparsed = minidom.parseString(rough_string)
        return reparsed.toprettyxml(indent="  ")
    
    #Now dump it all out to file
    if options.outputFile != None:
        fileHandle = open(options.outputFile, 'w')
        #ET.ElementTree(collatedStatsTag).write(fileHandle)
        fileHandle.write(prettify(collatedStatsTag))
        fileHandle.close()
    
    #Now dump onto the screen
    print prettify(collatedStatsTag)