Example #1
0
def main():
    """Restarts a toil workflow.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  

    parser = getBasicOptionParser()

    parser.add_argument("--version", action='version', version=version)

    parser.add_argument("jobStore", type=str,
          help=("Store in which to place job management files \
          and the global accessed temporary files"
          "(If this is a file path this needs to be globally accessible "
          "by all machines running jobs).\n"
          "If the store already exists and restart is false an"
          " ExistingJobStoreException exception will be thrown."))

    options = parseBasicOptions(parser)
        
    ##########################################
    #Now run the toil construction/leader
    ##########################################  
        
    setLoggingFromOptions(options)
    options.restart = True
    with setupToil(options) as (config, batchSystem, jobStore):
        jobStore.clean(Job._loadRootJob(jobStore))
        mainLoop(config, batchSystem, jobStore, Job._loadRootJob(jobStore))
Example #2
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help=(
            "Store in which to place job management files \
              and the global accessed temporary files"
            "(If this is a file path this needs to be globally accessible "
            "by all machines running jobs).\n"
            "If the store already exists and restart is false an"
            " ExistingJobStoreException exception will be thrown."
        ),
    )
    parser.add_argument("--version", action="version", version=version)
    options = parseBasicOptions(parser)

    jobStore = Toil.loadOrCreateJobStore(options.jobStore)

    logger.info("Starting routine to kill running jobs in the toil workflow: %s" % options.jobStore)
    ####This behaviour is now broken
    batchSystem = Toil.createBatchSystem(
        jobStore.config
    )  # This should automatically kill the existing jobs.. so we're good.
    for jobID in batchSystem.getIssuedBatchJobIDs():  # Just in case we do it again.
        batchSystem.killBatchJobs(jobID)
    logger.info("All jobs SHOULD have been killed")
Example #3
0
 def setUpClass(cls):
     super(ToilTest, cls).setUpClass()
     cls.orig_sys_argv = sys.argv[1:]
     sys.argv[1:] = shlex.split(os.environ.get('TOIL_TEST_ARGS', ""))
     parser = getBasicOptionParser()
     options, args = parseSuiteTestOptions(parser)
     sys.argv[1:] = args
Example #4
0
def main():
    """Removes the JobStore from a toil run.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser()
    parser.add_argument("jobStore", type=str,
                      help=("Store in which to place job management files \
                      and the global accessed temporary files"
                      "(If this is a file path this needs to be globally accessible "
                      "by all machines running jobs).\n"
                      "If the store already exists and restart is false an"
                      " ExistingJobStoreException exception will be thrown."))
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    ##########################################
    #Survey the status of the job and report.
    ##########################################
    logger.info("Checking if we have files for toil")
    try:
        jobStore = Toil.loadOrCreateJobStore(options.jobStore)
    except JobStoreCreationException:
        logger.info("The specified JobStore does not exist, it may have already been deleted")
        sys.exit(0)

    logger.info("Deleting the JobStore")
    jobStore.deleteJobStore()
Example #5
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    config = parseBasicOptions(parser)
    cluster = Cluster(provisioner=config.provisioner,
                      clusterName=config.clusterName, zone=config.zone)
    cluster.destroyCluster()
Example #6
0
def main():
    parser = getBasicOptionParser("usage: %prog [--jobStore] JOB_TREE_DIR [more options]", "%prog 0.1")
    
    parser.add_option("--jobStore", dest="jobStore",
                      help="Job store path. Can also be specified as the single argument to the script.")
    
    options, args = parseBasicOptions(parser)
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    assert len(args) <= 1 #Only toil may be specified as argument
    if len(args) == 1: #Allow toil directory as arg
        options.jobStore = args[0]
        
    logger.info("Parsed arguments")
    if options.jobStore == None:
        parser.error("Specify --jobStore")

    jobStore = loadJobStore(options.jobStore)
    
    logger.info("Starting routine to kill running jobs in the toil workflow: %s" % options.jobStore)
    ####This behaviour is now broken
    batchSystem = loadBatchSystem(jobStore.config) #This should automatically kill the existing jobs.. so we're good.
    for jobID in batchSystem.getIssuedBatchJobIDs(): #Just in case we do it again.
        batchSystem.killBatchJobs(jobID)
    logger.info("All jobs SHOULD have been killed")
Example #7
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    cluster = Cluster(provisioner=config.provisioner, clusterName=config.clusterName)
    cluster.destroyCluster()
Example #8
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--insecure", dest='insecure', action='store_true', required=False,
                        help="Temporarily disable strict host key checking.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    config = parseBasicOptions(parser)
    cluster = Cluster(provisioner=config.provisioner,
                      clusterName=config.clusterName, zone=config.zone)
    cluster.sshCluster(args=config.args, strict=not config.insecure)
Example #9
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("jobStore", type=str,
                        help="The location of the job store to delete. " + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Attempting to delete the job store")
    jobStore = Toil.getJobStore(options.jobStore)
    jobStore.destroy()
    logger.info("Successfully deleted the job store")
Example #10
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("jobStore", type=str,
                        help="The location of the job store to delete. " + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    config = Config()
    config.setOptions(parseBasicOptions(parser))
    logger.info("Attempting to delete the job store")
    jobStore = Toil.getJobStore(config.jobStore)
    jobStore.destroy()
    logger.info("Successfully deleted the job store")
Example #11
0
def main():
    """ Reports stats on the workflow, use with --stats option to toil.
    """
    parser = getBasicOptionParser()
    initializeOptions(parser)
    options = parseBasicOptions(parser)
    checkOptions(options, parser)
    jobStore = Toil.loadOrCreateJobStore(options.jobStore)
    stats = getStats(options)
    collatedStatsTag = processData(jobStore.config, stats, options)
    reportData(collatedStatsTag, options)
Example #12
0
def main():
    """ Reports stats on the workflow, use with --stats option to toil.
    """
    parser = getBasicOptionParser()
    initializeOptions(parser)
    options = parseBasicOptions(parser)
    checkOptions(options, parser)
    jobStore = loadJobStore(options.jobStore)
    stats = getStats(options)
    collatedStatsTag = processData(jobStore.config, stats, options)
    reportData(collatedStatsTag, options)
Example #13
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--insecure", dest='insecure', action='store_true', required=False,
                        help="Temporarily disable strict host key checking.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    config = parseBasicOptions(parser)
    cluster = clusterFactory(provisioner=config.provisioner,
                             clusterName=config.clusterName,
                             zone=config.zone)
    command = config.args if config.args else ['bash']
    cluster.getLeader().sshAppliance(*command, strict=not config.insecure, tty=sys.stdin.isatty())
Example #14
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("--version", action='version', version=version)
    parser.add_argument(
        "--nodeType",
        dest='nodeType',
        required=True,
        help="Node type for {non-|}preemptable nodes. The syntax depends on the "
        "provisioner used. For the aws provisioner this is the name of an "
        "EC2 instance type followed by a colon and the price in dollar to "
        "bid for a spot instance, for example 'c3.8xlarge:0.42'.")
    parser.add_argument(
        '-p',
        "--provisioner",
        dest='provisioner',
        choices=['aws'],
        required=True,
        help="The provisioner for cluster auto-scaling. Only aws is currently"
        "supported")
    parser.add_argument(
        "clusterName",
        help="The name that the cluster will be identifiable by")
    parser.add_argument(
        "--keyPairName",
        dest='keyPairName',
        required=True,
        help="The name of the AWS key pair to include on the instance")
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    spotBid = None
    provisioner = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            raise RuntimeError(
                'The aws extra must be installed to use this provisioner')
        provisioner = AWSProvisioner
        parsedBid = config.nodeType.split(':', 1)
        if len(config.nodeType) != len(parsedBid[0]):
            # there is a bid
            spotBid = float(parsedBid[1])
            config.nodeType = parsedBid[0]
    else:
        assert False

    provisioner.launchCluster(instanceType=config.nodeType,
                              clusterName=config.clusterName,
                              keyName=config.keyPairName,
                              spotBid=spotBid)
Example #15
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--insecure", dest='insecure', action='store_true', required=False,
                        help="Temporarily disable strict host key checking.")
    parser.add_argument("args", nargs=argparse.REMAINDER, help="Arguments to pass to"
                        "`rsync`. Takes any arguments that rsync accepts. Specify the"
                        " remote with a colon. For example, to upload `example.py`,"
                        " specify `toil rsync-cluster -p aws test-cluster example.py :`."
                        "\nOr, to download a file from the remote:, `toil rsync-cluster"
                        " -p aws test-cluster :example.py .`")
    config = parseBasicOptions(parser)
    cluster = Cluster(provisioner=config.provisioner, clusterName=config.clusterName, zone=config.zone)
    cluster.rsyncCluster(args=config.args, strict=not config.insecure)
Example #16
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of the job store used by the workflow." +
        jobStoreLocatorHelp)
    parser.add_argument("--localFilePath",
                        nargs=1,
                        help="Location to which to copy job store files.")
    parser.add_argument("--fetch",
                        nargs="+",
                        help="List of job-store files to be copied locally."
                        "Use either explicit names (i.e. 'data.txt'), or "
                        "specify glob patterns (i.e. '*.txt')")
    parser.add_argument(
        "--listFilesInJobStore",
        help="Prints a list of the current files in the jobStore.")
    parser.add_argument(
        "--fetchEntireJobStore",
        help="Copy all job store files into a local directory.")
    parser.add_argument(
        "--useSymlinks",
        help="Creates symlink 'shortcuts' of files in the localFilePath"
        " instead of hardlinking or copying, where possible.  If this is"
        " not possible, it will copy the files (shutil.copyfile()).")
    parser.add_argument("--version", action='version', version=version)

    # Load the jobStore
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    logger.debug("Connected to job store: %s", config.jobStore)

    if options.fetch:
        # Copy only the listed files locally
        logger.debug("Fetching local files: %s", options.fetch)
        fetchJobStoreFiles(jobStore=jobStore, options=options)

    elif options.fetchEntireJobStore:
        # Copy all jobStore files locally
        logger.debug("Fetching all local files.")
        options.fetch = "*"
        fetchJobStoreFiles(jobStore=jobStore, options=options)

    if options.listFilesInJobStore:
        # Log filenames and create a file containing these names in cwd
        printContentsOfJobStore(jobStorePath=options.jobStore)
Example #17
0
def main():
    """ Reports stats on the job-tree, use with --stats option to toil.
    """

    parser = getBasicOptionParser()
    initializeOptions(parser)
    options = parseBasicOptions(parser)
    checkOptions(options, parser)
    jobStore = loadJobStore(options.jobStore)
    #collatedStatsTag = cacheAvailable(options)
    #if collatedStatsTag is None:
    stats = getStats(options)
    collatedStatsTag = processData(jobStore.config, stats, options)
    reportData(collatedStatsTag, options)
Example #18
0
def main():
    """ Reports stats on the job-tree, use with --stats option to toil.
    """

    parser = getBasicOptionParser(
        "usage: %prog [--jobStore] JOB_TREE_DIR [options]", "%prog 0.1")
    initializeOptions(parser)
    options, args = parseBasicOptions(parser)
    checkOptions(options, args, parser)
    jobStore = loadJobStore(options.jobStore)
    #collatedStatsTag = cacheAvailable(options)
    #if collatedStatsTag is None:
    stats = getStats(options)
    collatedStatsTag = processData(jobStore.config, stats, options)
    reportData(collatedStatsTag, options)
Example #19
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument("jobStore", type=str,
                        help="The location of the job store used by the workflow whose jobs should "
                             "be killed." + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)

    jobStore = Toil.resumeJobStore(options.jobStore)

    logger.info("Starting routine to kill running jobs in the toil workflow: %s" % options.jobStore)
    ####This behaviour is now broken
    batchSystem = Toil.createBatchSystem(jobStore.config) #This should automatically kill the existing jobs.. so we're good.
    for jobID in batchSystem.getIssuedBatchJobIDs(): #Just in case we do it again.
        batchSystem.killBatchJobs(jobID)
    logger.info("All jobs SHOULD have been killed")
Example #20
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument(
        "args",
        nargs=argparse.REMAINDER,
        help="Arguments to pass to"
        "`rsync`. Takes any arguments that rsync accepts. Specify the"
        " remote with a colon. For example, to upload `example.py`,"
        " specify `toil rsync-cluster -p aws test-cluster example.py :`."
        "\nOr, to download a file from the remote:, `toil rsync-cluster"
        " -p aws test-cluster :example.py .`")
    config = parseBasicOptions(parser)
    setLoggingFromOptions(config)
    cluster = Cluster(provisioner=config.provisioner,
                      clusterName=config.clusterName,
                      zone=config.zone)
    cluster.rsyncCluster(args=config.args)
Example #21
0
def main():
    """Restarts a toil workflow.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser()

    parser.add_argument("--version", action='version', version=version)

    parser.add_argument(
        "jobStore",
        type=str,
        help=("Store in which to place job management files \
          and the global accessed temporary files"
              "(If this is a file path this needs to be globally accessible "
              "by all machines running jobs).\n"
              "If the store already exists and restart is false an"
              " ExistingJobStoreException exception will be thrown."))

    options = parseBasicOptions(parser)

    ##########################################
    #Now run the toil construction/leader
    ##########################################

    setLoggingFromOptions(options)
    options.restart = True
    with setupToil(options) as (config, batchSystem, jobStore):
        # Load the whole jobstore into memory in a batch
        logger.info("Downloading entire JobStore")
        jobCache = {
            jobWrapper.jobStoreID: jobWrapper
            for jobWrapper in jobStore.jobs()
        }
        logger.info("{} jobs downloaded.".format(len(jobCache)))
        jobStore.clean(Job._loadRootJob(jobStore), jobCache=jobCache)
        mainLoop(config,
                 batchSystem,
                 jobStore,
                 Job._loadRootJob(jobStore),
                 jobCache=jobCache)
Example #22
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help=
        "The location of the job store used by the workflow whose jobs should "
        "be killed." + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    config.jobStore = config.jobStore[5:] if config.jobStore.startswith(
        'file:') else config.jobStore

    # ':' means an aws/google jobstore; use the old (broken?) method
    if ':' in config.jobStore:
        jobStore = Toil.resumeJobStore(config.jobStore)
        logger.info(
            "Starting routine to kill running jobs in the toil workflow: %s",
            config.jobStore)
        # TODO: This behaviour is now broken src: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        batchSystem = Toil.createBatchSystem(
            jobStore.config
        )  # Should automatically kill existing jobs, so we're good.
        for jobID in batchSystem.getIssuedBatchJobIDs(
        ):  # Just in case we do it again.
            batchSystem.killBatchJobs(jobID)
        logger.info("All jobs SHOULD have been killed")
    # otherwise, kill the pid recorded in the jobstore
    else:
        pid_log = os.path.join(os.path.abspath(config.jobStore), 'pid.log')
        with open(pid_log, 'r') as f:
            pid2kill = f.read().strip()
        try:
            os.kill(int(pid2kill), signal.SIGKILL)
            logger.info("Toil process %s successfully terminated." %
                        str(pid2kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." %
                         str(pid2kill))
            raise
Example #23
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("jobStore",
                        type=str,
                        help="The location of the job store to delete. " +
                        jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    config = Config()
    config.setOptions(parseBasicOptions(parser))
    try:
        jobStore = Toil.getJobStore(config.jobStore)
        jobStore.resume()
        jobStore.destroy()
        logger.info("Successfully deleted the job store: %s" % config.jobStore)
    except NoSuchJobStoreException:
        logger.info("Failed to delete the job store: %s is non-existent" %
                    config.jobStore)
    except:
        logger.info("Failed to delete the job store: %s" % config.jobStore)
        raise
Example #24
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of the job store used by the workflow." +
        jobStoreLocatorHelp)
    parser.add_argument("jobID",
                        nargs=1,
                        help="The job store id of a job "
                        "within the provided jobstore to run by itself.")
    parser.add_argument(
        "--printJobInfo",
        nargs=1,
        help="Return information about this job to the user"
        " including preceding jobs, inputs, outputs, and runtime"
        " from the last known run.")
    parser.add_argument("--version", action='version', version=version)

    # Parse options
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)

    # Load the job store
    jobStore = Toil.resumeJobStore(config.jobStore)

    if options.printJobInfo:
        printContentsOfJobStore(jobStorePath=options.jobStore,
                                nameOfJob=options.printJobInfo)

    # TODO: Option to print list of successor jobs
    # TODO: Option to run job within python debugger, allowing step through of arguments
    # idea would be to have option to import pdb and set breakpoint at the start of the user's code

    # Run the job locally
    jobID = options.jobID[0]
    logger.debug("Going to run the following job locally: %s", jobID)
    workerScript(jobStore, config, jobID, jobID, redirectOutputToLogFile=False)
    logger.debug("Ran the following job locally: %s", jobID)
Example #25
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--insecure",
                        action='store_true',
                        help="Temporarily disable strict host key checking.")
    parser.add_argument("--sshOption",
                        dest='sshOptions',
                        default=[],
                        action='append',
                        help="Pass an additional option to the SSH command.")
    parser.add_argument('args', nargs=argparse.REMAINDER)
    config = parseBasicOptions(parser)
    cluster = clusterFactory(provisioner=config.provisioner,
                             clusterName=config.clusterName,
                             zone=config.zone)
    command = config.args if config.args else ['bash']
    cluster.getLeader().sshAppliance(*command,
                                     strict=not config.insecure,
                                     tty=sys.stdin.isatty(),
                                     sshOptions=config.sshOptions)
Example #26
0
def main():
    """Removes the JobStore from a toil run.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser()
    parser.add_argument(
        "jobStore",
        type=str,
        help=("Store in which to place job management files \
                      and the global accessed temporary files"
              "(If this is a file path this needs to be globally accessible "
              "by all machines running jobs).\n"
              "If the store already exists and restart is false an"
              " ExistingJobStoreException exception will be thrown."))
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    ##########################################
    #Survey the status of the job and report.
    ##########################################
    logger.info("Checking if we have files for toil")
    try:
        jobStore = Toil.loadOrCreateJobStore(options.jobStore)
    except JobStoreCreationException:
        logger.info(
            "The specified JobStore does not exist, it may have already been deleted"
        )
        sys.exit(0)

    logger.info("Attempting to delete the job store")
    jobStore.deleteJobStore()
    logger.info("Successfully deleted the job store")
Example #27
0
def main():
    """Restarts a toil workflow.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  

    parser = getBasicOptionParser()

    parser.add_argument("--version", action='version', version=version)

    parser.add_argument("jobStore", type=str,
          help=("Store in which to place job management files \
          and the global accessed temporary files"
          "(If this is a file path this needs to be globally accessible "
          "by all machines running jobs).\n"
          "If the store already exists and restart is false an"
          " ExistingJobStoreException exception will be thrown."))

    options = parseBasicOptions(parser)
        
    ##########################################
    #Now run the toil construction/leader
    ##########################################  
        
    setLoggingFromOptions(options)
    options.restart = True
    with setupToil(options) as (config, batchSystem, jobStore):
        # Load the whole jobstore into memory in a batch
        logger.info("Downloading entire JobStore")
        jobCache = {jobWrapper.jobStoreID: jobWrapper
            for jobWrapper in jobStore.jobs()}
        logger.info("{} jobs downloaded.".format(len(jobCache)))
        jobStore.clean(Job._loadRootJob(jobStore), jobCache=jobCache)
        mainLoop(config, batchSystem, jobStore, Job._loadRootJob(jobStore), jobCache=jobCache)
Example #28
0
def main():
    """Reports the state of a Toil workflow."""
    parser = getBasicOptionParser()

    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)

    parser.add_argument("--failIfNotComplete", action="store_true",
                        help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                        default=False)

    parser.add_argument("--noAggStats", dest="stats", action="store_false",
                        help="Do not print overall, aggregate status of workflow.",
                        default=True)

    parser.add_argument("--printDot", action="store_true",
                        help="Print dot formatted description of the graph. If using --jobs will "
                             "restrict to subgraph including only those jobs. default=%(default)s",
                        default=False)

    parser.add_argument("--jobs", nargs='+',
                        help="Restrict reporting to the following jobs (allows subsetting of the report).",
                        default=None)

    parser.add_argument("--printPerJobStats", action="store_true",
                        help="Print info about each job. default=%(default)s",
                        default=False)

    parser.add_argument("--printLogs", action="store_true",
                        help="Print the log files of jobs (if they exist). default=%(default)s",
                        default=False)

    parser.add_argument("--printChildren", action="store_true",
                        help="Print children of each job. default=%(default)s",
                        default=False)

    parser.add_argument("--version", action='version', version=version)

    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)

    ##########################################
    # Gather the jobs to report
    ##########################################

    # Gather all jobs in the workflow in jobsToReport
    if options.jobs == None:
        rootJob = fetchRootJob(jobStore)
        logger.info('Traversing the job graph gathering jobs. This may take a couple of minutes.')
        jobsToReport = traverseJobGraph(rootJob, jobStore)

    # Only gather jobs specified in options.jobs
    else:
        jobsToReport = fetchUserJobs(jobStore, jobs=options.jobs)

    ##########################################
    # Report on the jobs
    ##########################################

    jobStats = report_on_jobs(jobsToReport, jobStore, options)

    hasChildren = jobStats['hasChildren']
    readyToRun = jobStats['readyToRun']
    zombies = jobStats['zombies']
    hasServices = jobStats['hasServices']
    services = jobStats['services']
    hasLogFile = jobStats['hasLogFile']
    properties = jobStats['properties']
    childNumber = jobStats['childNumber']

    if options.printPerJobStats:
        printAggregateJobStats(jobsToReport, properties, childNumber)

    if options.printLogs:
        printJobLog(jobsToReport, jobStore)

    if options.printChildren:
        printJobChildren(jobsToReport)

    if options.printDot:
        print_dot_chart(jobsToReport, jobStore_name=config.jobStore)

    if options.stats:
        print('Of the %i jobs considered, '
           'there are %i jobs with children, '
           '%i jobs ready to run, '
           '%i zombie jobs, '
           '%i jobs with services, '
           '%i services, '
           'and %i jobs with log files currently in %s.' %
            (len(jobsToReport), len(hasChildren), len(readyToRun), len(zombies),
             len(hasServices), len(services), len(hasLogFile), config.jobStore))

    if len(jobsToReport) > 0 and options.failIfNotComplete:
        # Upon workflow completion, all jobs will have been removed from job store
        exit(1)
Example #29
0
def main():
    """Reports the state of a Toil workflow."""
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of a job store that holds the information about the "
        "workflow whose status is to be reported on." + jobStoreLocatorHelp)

    parser.add_argument(
        "--failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%(default)s",
        default=False)

    parser.add_argument(
        "--noAggStats",
        dest="stats",
        action="store_false",
        help="Do not print overall, aggregate status of workflow.",
        default=True)

    parser.add_argument(
        "--printDot",
        action="store_true",
        help=
        "Print dot formatted description of the graph. If using --jobs will "
        "restrict to subgraph including only those jobs. default=%(default)s",
        default=False)

    parser.add_argument(
        "--jobs",
        nargs='+',
        help=
        "Restrict reporting to the following jobs (allows subsetting of the report).",
        default=None)

    parser.add_argument("--printPerJobStats",
                        action="store_true",
                        help="Print info about each job. default=%(default)s",
                        default=False)

    parser.add_argument(
        "--printLogs",
        action="store_true",
        help="Print the log files of jobs (if they exist). default=%(default)s",
        default=False)

    parser.add_argument("--printChildren",
                        action="store_true",
                        help="Print children of each job. default=%(default)s",
                        default=False)

    parser.add_argument("--version", action='version', version=version)

    options = parseBasicOptions(parser)

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    config = Config()
    config.setOptions(options)

    try:
        status = ToilStatus(config.jobStore, options.jobs)
    except NoSuchJobStoreException:
        print('No job store found.')
        return
    except JobException:  # Workflow likely complete, user informed in ToilStatus()
        return

    jobStats = status.report_on_jobs()

    # Info to be reported.
    hasChildren = jobStats['hasChildren']
    readyToRun = jobStats['readyToRun']
    zombies = jobStats['zombies']
    hasServices = jobStats['hasServices']
    services = jobStats['services']
    hasLogFile = jobStats['hasLogFile']
    properties = jobStats['properties']
    childNumber = jobStats['childNumber']

    if options.printPerJobStats:
        status.printAggregateJobStats(properties, childNumber)
    if options.printLogs:
        status.printJobLog()
    if options.printChildren:
        status.printJobChildren()
    if options.printDot:
        status.print_dot_chart()
    if options.stats:
        print('Of the %i jobs considered, '
              'there are %i jobs with children, '
              '%i jobs ready to run, '
              '%i zombie jobs, '
              '%i jobs with services, '
              '%i services, '
              'and %i jobs with log files currently in %s.' %
              (len(status.jobsToReport), len(hasChildren), len(readyToRun),
               len(zombies), len(hasServices), len(services), len(hasLogFile),
               status.jobStore))

    if len(status.jobsToReport) > 0 and options.failIfNotComplete:
        # Upon workflow completion, all jobs will have been removed from job store
        exit(1)
Example #30
0
def main():
    """Reports the state of the toil.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser(
        "usage: %prog [--jobStore] JOB_TREE_DIR [options]", "%prog 0.1")

    parser.add_option(
        "--jobStore",
        dest="jobStore",
        help=
        "Job store path. Can also be specified as the single argument to the script.\
                       default=%default",
        default=os.path.abspath("./toil"))

    parser.add_option(
        "--verbose",
        dest="verbose",
        action="store_true",
        help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%default",
        default=False)

    parser.add_option(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%default",
        default=False)

    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    assert len(args) <= 1  #Only toil may be specified as argument
    if len(args) == 1:  #Allow toil directory as arg
        options.jobStore = args[0]

    ##########################################
    #Do some checks.
    ##########################################

    logger.info("Checking if we have files for toil")
    assert options.jobStore != None

    ##########################################
    #Survey the status of the job and report.
    ##########################################

    jobStore = loadJobStore(options.jobStore)
    try:
        rootJob = Job._loadRootJob(jobStore)
    except JobException:
        print "The root job of the jobStore is not present, the toil workflow has probably completed okay"
        sys.exit(0)

    toilState = ToilState(jobStore, rootJob)

    failedJobs = [ job for job in toilState.updatedJobs | \
                  set(toilState.successorCounts.keys()) \
                  if job.remainingRetryCount == 0 ]

    print "There are %i active jobs, %i parent jobs with children, and \
    %i totally failed jobs currently in toil workflow: %s"                                                           % \
    (len(toilState.updatedJobs), len(toilState.successorCounts),
     len(failedJobs), options.jobStore)

    if options.verbose:  #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print "Log file for job %s is not present" % job.jobStoreID
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"

    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
Example #31
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument(
        "--nodeType",
        dest='nodeType',
        required=True,
        help="Node type for {non-|}preemptable nodes. The syntax depends on the "
        "provisioner used. For the aws provisioner this is the name of an "
        "EC2 instance type followed by a colon and the price in dollar to "
        "bid for a spot instance, for example 'c3.8xlarge:0.42'.")
    parser.add_argument(
        "--keyPairName",
        dest='keyPairName',
        required=True,
        help="The name of the AWS key pair to include on the instance")
    parser.add_argument(
        "-t",
        "--tag",
        metavar='NAME=VALUE',
        dest='tags',
        default=[],
        action='append',
        help="Tags are added to the AWS cluster for this node and all of its"
        "children. Tags are of the form: "
        " -t key1=value1 --tag key2=value2 "
        "Multiple tags are allowed and each tag needs its own flag. By "
        "default the cluster is tagged with "
        " {"
        "      \"Name\": clusterName,"
        "      \"Owner\": IAM username"
        " }. ")
    parser.add_argument(
        "--vpcSubnet",
        help=
        "VPC subnet ID to launch cluster in. Uses default subnet if not specified."
        "This subnet needs to have auto assign IPs turned on.")
    parser.add_argument(
        "-w",
        "--workers",
        dest='workers',
        default=0,
        type=int,
        help=
        "Specify a number of workers to launch alongside the leader when the "
        "cluster is created. This can be useful if running toil without "
        "auto-scaling but with need of more hardware support")
    config = parseBasicOptions(parser)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    spotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            raise RuntimeError(
                'The aws extra must be installed to use this provisioner')
        provisioner = AWSProvisioner()
        parsedBid = config.nodeType.split(':', 1)
        if len(config.nodeType) != len(parsedBid[0]):
            # there is a bid
            spotBid = float(parsedBid[1])
            config.nodeType = parsedBid[0]
    else:
        assert False

    provisioner.launchCluster(instanceType=config.nodeType,
                              keyName=config.keyPairName,
                              clusterName=config.clusterName,
                              workers=config.workers,
                              spotBid=spotBid,
                              userTags=tagsDict,
                              zone=config.zone,
                              vpcSubnet=config.vpcSubnet)
Example #32
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--nodeType", dest='nodeType', required=True,
                        help="Node type for {non-|}preemptable nodes. The syntax depends on the "
                             "provisioner used. For the aws provisioner this is the name of an "
                             "EC2 instance type followed by a colon and the price in dollar to "
                             "bid for a spot instance, for example 'c3.8xlarge:0.42'.")
    parser.add_argument("--keyPairName", dest='keyPairName', required=True,
                        help="The name of the AWS key pair to include on the instance")
    parser.add_argument("-t", "--tag", metavar='NAME=VALUE', dest='tags', default=[], action='append',
                        help="Tags are added to the AWS cluster for this node and all of its "
                             "children. Tags are of the form:\n"
                             " --t key1=value1 --tag key2=value2\n"
                             "Multiple tags are allowed and each tag needs its own flag. By "
                             "default the cluster is tagged with "
                             " {\n"
                             "      \"Name\": clusterName,\n"
                             "      \"Owner\": IAM username\n"
                             " }. ")
    parser.add_argument("--vpcSubnet",
                        help="VPC subnet ID to launch cluster in. Uses default subnet if not specified. "
                        "This subnet needs to have auto assign IPs turned on.")
    parser.add_argument("-w", "--workers", dest='workers', default=0, type=int,
                        help="Specify a number of workers to launch alongside the leader when the "
                             "cluster is created. This can be useful if running toil without "
                             "auto-scaling but with need of more hardware support")
    parser.add_argument("--leaderStorage", dest='leaderStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for the leader instance. "
                             "This is an EBS volume.")
    parser.add_argument("--nodeStorage", dest='nodeStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for any worker instances "
                             "created when using the -w flag. This is an EBS volume.")
    config = parseBasicOptions(parser)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    spotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            raise RuntimeError('The aws extra must be installed to use this provisioner')
        provisioner = AWSProvisioner()
        parsedBid = config.nodeType.split(':', 1)
        if len(config.nodeType) != len(parsedBid[0]):
            # there is a bid
            spotBid = float(parsedBid[1])
            config.nodeType = parsedBid[0]
    else:
        assert False

    provisioner.launchCluster(instanceType=config.nodeType,
                              keyName=config.keyPairName,
                              clusterName=config.clusterName,
                              workers=config.workers,
                              spotBid=spotBid,
                              userTags=tagsDict,
                              zone=config.zone,
                              leaderStorage=config.leaderStorage,
                              nodeStorage=config.nodeStorage,
                              vpcSubnet=config.vpcSubnet)
Example #33
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print('The root job of the job store is absent, the workflow completed successfully.',
              file=sys.stderr)
        sys.exit(0)

    def traverseGraph(jobGraph):
        foundJobStoreIDs = set()
        totalJobs = []
        def inner(jobGraph):
            if jobGraph.jobStoreID in foundJobStoreIDs:
                return
            foundJobStoreIDs.add(jobGraph.jobStoreID)
            totalJobs.append(jobGraph)
            # Traverse jobs in stack
            for jobs in jobGraph.stack:
                for successorJobStoreID in [x.jobStoreID for x in jobs]:
                    if (successorJobStoreID not in foundJobStoreIDs and jobStore.exists(successorJobStoreID)):
                        inner(jobStore.load(successorJobStoreID))

            # Traverse service jobs
            for jobs in jobGraph.services:
                for serviceJobStoreID in [x.jobStoreID for x in jobs]:
                    if jobStore.exists(serviceJobStoreID):
                        assert serviceJobStoreID not in foundJobStoreIDs
                        foundJobStoreIDs.add(serviceJobStoreID)
                        totalJobs.append(jobStore.load(serviceJobStoreID))
        inner(jobGraph)
        return totalJobs

    logger.info('Traversing the job graph. This may take a couple minutes.')
    totalJobs = traverseGraph(rootJob)

    failedJobs = []
    hasChildren = []
    hasServices = []
    services = []
    currentlyRunnning = []

    for job in totalJobs:
        if job.logJobStoreFileID is not None:
            failedJobs.append(job)
        if job.stack:
            hasChildren.append(job)
        elif job.remainingRetryCount != 0 and job.logJobStoreFileID != 0 and job.command:
            # The job has no children, hasn't failed, and has a command to run. This indicates that the job is
            # likely currently running, or at least could be run.
            currentlyRunnning.append(job)
        if job.services:
            hasServices.append(job)
        if job.startJobStoreID or job.terminateJobStoreID or job.errorJobStoreID:
            # these attributes are only set in service jobs
            services.append(job)

    logger.info('There are %i unfinished jobs, %i parent jobs with children, %i jobs with services, %i services, '
                'and %i totally failed jobs currently in %s.' %
                (len(totalJobs), len(hasChildren), len(hasServices), len(services), len(failedJobs), config.jobStore))

    if currentlyRunnning:
        logger.info('These %i jobs are currently active: %s',
                    len(currentlyRunnning), ' \n'.join(map(str, currentlyRunnning)))

    if options.verbose: #Verbose currently means outputting the files that have failed.
        if failedJobs:
            msg = "Outputting logs for the %i failed jobs" % (len(failedJobs))
            msg += ": %s" % ", ".join((str(failedJob) for failedJob in failedJobs))
            for jobNode in failedJobs:
                job = jobStore.load(jobNode.jobStoreID)
                msg += "\n=========> Failed job %s \n" % jobNode
                with job.getLogFileHandle(jobStore) as fH:
                    msg += fH.read()
                msg += "<=========\n"
            print(msg)
        else:
            print('There are no failed jobs to report.', file=sys.stderr)

    if totalJobs and options.failIfNotComplete:
        exit(1) # when the workflow is complete, all jobs will have been removed from job store
Example #34
0
def main():
    """Reports the state of the toil.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of a job store that holds the information about the "
        "workflow whose status is to be reported on." + jobStoreLocatorHelp)

    parser.add_argument(
        "--verbose",
        dest="verbose",
        action="store_true",
        help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
        default=False)

    parser.add_argument(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%(default)s",
        default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    ##########################################
    #Do some checks.
    ##########################################

    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None

    ##########################################
    #Survey the status of the job and report.
    ##########################################

    jobStore = Toil.resumeJobStore(options.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print(
            'The root job of the job store is absent, the workflow completed successfully.',
            file=sys.stderr)
        sys.exit(0)

    toilState = ToilState(jobStore, rootJob)

    # The first element of the toilState.updatedJobs tuple is the jobWrapper we want to inspect
    totalJobs = set(toilState.successorCounts.keys()) | \
                {jobTuple[0] for jobTuple in toilState.updatedJobs}

    failedJobs = [job for job in totalJobs if job.remainingRetryCount == 0]

    print(
        'There are %i active jobs, %i parent jobs with children, and %i totally failed jobs '
        'currently in %s.' %
        (len(toilState.updatedJobs), len(
            toilState.successorCounts), len(failedJobs), options.jobStore),
        file=sys.stderr)

    if options.verbose:  #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print('Log file for job %s is absent.' % job.jobStoreID,
                      file=sys.stderr)
        if len(failedJobs) == 0:
            print('There are no failed jobs to report.', file=sys.stderr)

    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
Example #35
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
              help=("Store in which to place job management files \
              and the global accessed temporary files"
              "(If this is a file path this needs to be globally accessible "
              "by all machines running jobs).\n"
              "If the store already exists and restart is false an"
              " ExistingJobStoreException exception will be thrown."))
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for toil")
    assert options.jobStore != None
    
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = loadJobStore(options.jobStore)
    try:
        rootJob = Job._loadRootJob(jobStore)
    except JobException:
        print "The root job of the jobStore is not present, the toil workflow has probably completed okay"
        sys.exit(0)
    
    toilState = ToilState(jobStore, rootJob )

    # The first element of the toilState.updatedJobs tuple is the jobWrapper we want to inspect
    totalJobs = set(toilState.successorCounts.keys()) | \
                {jobTuple[0] for jobTuple in toilState.updatedJobs}

    failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ]
    
    print "There are %i active jobs, %i parent jobs with children, and \
    %i totally failed jobs currently in toil workflow: %s" % \
    (len(toilState.updatedJobs), len(toilState.successorCounts),
     len(failedJobs), options.jobStore)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print "Log file for job %s is not present" % job.jobStoreID
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"   
    
    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
Example #36
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument(
        "--leaderNodeType",
        dest="leaderNodeType",
        required=True,
        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument(
        "--keyPairName",
        dest='keyPairName',
        help="On AWS, the name of the AWS key pair to include on the instance."
        " On Google/GCE, this is the ssh key pair.")
    parser.add_argument(
        "--owner",
        dest='owner',
        help="The owner tag for all instances. If not given, the value in"
        " --keyPairName will be used if given.")
    parser.add_argument(
        "--boto",
        dest='botoPath',
        help="The path to the boto credentials directory. This is transferred "
        "to all nodes in order to access the AWS jobStore from non-AWS instances."
    )
    parser.add_argument(
        "-t",
        "--tag",
        metavar='NAME=VALUE',
        dest='tags',
        default=[],
        action='append',
        help="Tags are added to the AWS cluster for this node and all of its "
        "children. Tags are of the form:\n"
        " -t key1=value1 --tag key2=value2\n"
        "Multiple tags are allowed and each tag needs its own flag. By "
        "default the cluster is tagged with "
        " {\n"
        "      \"Name\": clusterName,\n"
        "      \"Owner\": IAM username\n"
        " }. ")
    parser.add_argument(
        "--vpcSubnet",
        help="VPC subnet ID to launch cluster in. Uses default subnet if not "
        "specified. This subnet needs to have auto assign IPs turned on.")
    parser.add_argument(
        "--nodeTypes",
        dest='nodeTypes',
        default=None,
        type=str,
        help="Comma-separated list of node types to create while launching the "
        "leader. The syntax for each node type depends on the provisioner "
        "used. For the aws provisioner this is the name of an EC2 instance "
        "type followed by a colon and the price in dollar to bid for a spot "
        "instance, for example 'c3.8xlarge:0.42'. Must also provide the "
        "--workers argument to specify how many workers of each node type "
        "to create.")
    parser.add_argument(
        "-w",
        "--workers",
        dest='workers',
        default=None,
        type=str,
        help=
        "Comma-separated list of the number of workers of each node type to "
        "launch alongside the leader when the cluster is created. This can be "
        "useful if running toil without auto-scaling but with need of more "
        "hardware support")
    parser.add_argument(
        "--leaderStorage",
        dest='leaderStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for the leader "
        "instance.  This is an EBS volume.")
    parser.add_argument(
        "--nodeStorage",
        dest='nodeStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for any worker "
        "instances created when using the -w flag. This is an EBS volume.")
    parser.add_argument(
        '--forceDockerAppliance',
        dest='forceDockerAppliance',
        action='store_true',
        default=False,
        help=
        "Disables sanity checking the existence of the docker image specified "
        "by TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for "
        "autoscaling.")
    parser.add_argument(
        '--awsEc2ProfileArn',
        dest='awsEc2ProfileArn',
        default=None,
        type=str,
        help=
        "If provided, the specified ARN is used as the instance profile for EC2 instances."
        "Useful for setting custom IAM profiles. If not specified, a new IAM role is created "
        "by default with sufficient access to perform basic cluster operations."
    )
    config = parseBasicOptions(parser)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)
    checkValidNodeTypes(config.provisioner, config.nodeTypes)
    checkValidNodeTypes(config.provisioner, config.leaderNodeType)

    # checks the validity of TOIL_APPLIANCE_SELF before proceeding
    applianceSelf(forceDockerAppliance=config.forceDockerAppliance)

    spotBids = []
    nodeTypes = []
    preemptableNodeTypes = []
    numNodes = []
    numPreemptableNodes = []
    if (config.nodeTypes
            or config.workers) and not (config.nodeTypes and config.workers):
        raise RuntimeError(
            "The --nodeTypes and --workers options must be specified together,"
        )
    if config.nodeTypes:
        nodeTypesList = config.nodeTypes.split(",")
        numWorkersList = config.workers.split(",")
        if not len(nodeTypesList) == len(numWorkersList):
            raise RuntimeError(
                "List of node types must be the same length as the list of workers."
            )
        for nodeTypeStr, num in zip(nodeTypesList, numWorkersList):
            parsedBid = nodeTypeStr.split(':', 1)
            if len(nodeTypeStr) != len(parsedBid[0]):
                #Is a preemptable node
                preemptableNodeTypes.append(parsedBid[0])
                spotBids.append(float(parsedBid[1]))
                numPreemptableNodes.append(int(num))
            else:
                nodeTypes.append(nodeTypeStr)
                numNodes.append(int(num))

    # set owner (default to keyPairName if not given)
    owner = 'toil'
    if config.owner:
        owner = config.owner
    elif config.keyPairName:
        owner = config.keyPairName

    # Check to see if the user specified a zone. If not, see if one is stored in an environment variable.
    config.zone = config.zone or getZoneFromEnv(config.provisioner)

    if not config.zone:
        raise RuntimeError(
            'Please provide a value for --zone or set a default in the TOIL_' +
            config.provisioner.upper() + '_ZONE enviroment variable.')

    cluster = clusterFactory(provisioner=config.provisioner,
                             clusterName=config.clusterName,
                             zone=config.zone,
                             nodeStorage=config.nodeStorage)

    cluster.launchCluster(leaderNodeType=config.leaderNodeType,
                          leaderStorage=config.leaderStorage,
                          owner=owner,
                          keyName=config.keyPairName,
                          botoPath=config.botoPath,
                          userTags=tagsDict,
                          vpcSubnet=config.vpcSubnet,
                          awsEc2ProfileArn=config.awsEc2ProfileArn)

    for nodeType, workers in zip(nodeTypes, numNodes):
        cluster.addNodes(nodeType=nodeType,
                         numNodes=workers,
                         preemptable=False)
    for nodeType, workers, spotBid in zip(preemptableNodeTypes,
                                          numPreemptableNodes, spotBids):
        cluster.addNodes(nodeType=nodeType,
                         numNodes=workers,
                         preemptable=True,
                         spotBid=spotBid)
Example #37
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument(
        "--leaderNodeType",
        dest="leaderNodeType",
        required=True,
        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument(
        "--keyPairName",
        dest='keyPairName',
        required=True,
        help="On AWS, the name of the AWS key pair to include on the instance."
        " On Google/GCE, this is the ssh key pair."
        " On Azure, this will be used as the owner tag.")
    parser.add_argument(
        "--publicKeyFile",
        dest='publicKeyFile',
        default="~/.ssh/id_rsa.pub",
        help="On Azure, the file"
        " containing the key pairs (the first key pair will be used).")
    parser.add_argument(
        "--boto",
        dest='botoPath',
        help="The path to the boto credentials directory. This is transferred "
        "to all nodes in order to access the AWS jobStore from non-AWS instances."
    )
    parser.add_argument(
        "-t",
        "--tag",
        metavar='NAME=VALUE',
        dest='tags',
        default=[],
        action='append',
        help="Tags are added to the AWS cluster for this node and all of its "
        "children. Tags are of the form:\n"
        " -t key1=value1 --tag key2=value2\n"
        "Multiple tags are allowed and each tag needs its own flag. By "
        "default the cluster is tagged with "
        " {\n"
        "      \"Name\": clusterName,\n"
        "      \"Owner\": IAM username\n"
        " }. ")
    parser.add_argument(
        "--vpcSubnet",
        help="VPC subnet ID to launch cluster in. Uses default subnet if not "
        "specified. This subnet needs to have auto assign IPs turned on.")
    parser.add_argument(
        "--nodeTypes",
        dest='nodeTypes',
        default=None,
        type=str,
        help="Comma-separated list of node types to create while launching the "
        "leader. The syntax for each node type depends on the provisioner "
        "used. For the aws provisioner this is the name of an EC2 instance "
        "type followed by a colon and the price in dollar to bid for a spot "
        "instance, for example 'c3.8xlarge:0.42'. Must also provide the "
        "--workers argument to specify how many workers of each node type "
        "to create.")
    parser.add_argument(
        "-w",
        "--workers",
        dest='workers',
        default=None,
        type=str,
        help=
        "Comma-separated list of the number of workers of each node type to "
        "launch alongside the leader when the cluster is created. This can be "
        "useful if running toil without auto-scaling but with need of more "
        "hardware support")
    parser.add_argument(
        "--leaderStorage",
        dest='leaderStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for the leader "
        "instance.  This is an EBS volume.")
    parser.add_argument(
        "--nodeStorage",
        dest='nodeStorage',
        type=int,
        default=50,
        help="Specify the size (in gigabytes) of the root volume for any worker "
        "instances created when using the -w flag. This is an EBS volume.")
    parser.add_argument(
        '--forceDockerAppliance',
        dest='forceDockerAppliance',
        action='store_true',
        default=False,
        help=
        "Disables sanity checking the existence of the docker image specified "
        "by TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for "
        "autoscaling.")
    parser.add_argument(
        "--azureStorageCredentials",
        dest='azureStorageCredentials',
        type=str,
        default=credential_file_path,
        help=
        "The location of the file containing the Azure storage credentials. If not specified,"
        " the default file is used with Azure provisioning. Use 'None' to disable"
        " the transfer of credentials.")
    config = parseBasicOptions(parser)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    # checks the validity of TOIL_APPLIANCE_SELF before proceeding
    checkToilApplianceSelf = applianceSelf(
        forceDockerAppliance=config.forceDockerAppliance)

    spotBids = []
    nodeTypes = []
    preemptableNodeTypes = []
    numNodes = []
    numPreemptableNodes = []
    leaderSpotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            logger.error(
                'The aws extra must be installed to use this provisioner')
            raise
        provisioner = AWSProvisioner()
    elif config.provisioner == 'azure':
        try:
            from toil.provisioners.azure.azureProvisioner import AzureProvisioner
        except ImportError:
            raise RuntimeError(
                'The aws extra must be installed to use this provisioner')
        provisioner = AzureProvisioner()
    elif config.provisioner == 'gce':
        logger.info('Using a gce provisioner.')
        try:
            from toil.provisioners.gceProvisioner import GCEProvisioner
        except ImportError:
            logger.error(
                'The google extra must be installed to use this provisioner')
            raise
        provisioner = GCEProvisioner()
    else:
        assert False

    #Parse leader node type and spot bid
    parsedBid = config.leaderNodeType.split(':', 1)
    if len(config.leaderNodeType) != len(parsedBid[0]):
        leaderSpotBid = float(parsedBid[1])
        config.leaderNodeType = parsedBid[0]

    if (config.nodeTypes
            or config.workers) and not (config.nodeTypes and config.workers):
        raise RuntimeError(
            "The --nodeTypes and --workers options must be specified together,"
        )
    if config.nodeTypes:
        nodeTypesList = config.nodeTypes.split(",")
        numWorkersList = config.workers.split(",")
        if not len(nodeTypesList) == len(numWorkersList):
            raise RuntimeError(
                "List of node types must be the same length as the list of workers."
            )
        for nodeTypeStr, num in zip(nodeTypesList, numWorkersList):
            parsedBid = nodeTypeStr.split(':', 1)
            if len(nodeTypeStr) != len(parsedBid[0]):
                #Is a preemptable node
                preemptableNodeTypes.append(parsedBid[0])
                spotBids.append(float(parsedBid[1]))
                numPreemptableNodes.append(int(num))
            else:
                nodeTypes.append(nodeTypeStr)
                numNodes.append(int(num))

    provisioner.launchCluster(
        leaderNodeType=config.leaderNodeType,
        leaderSpotBid=leaderSpotBid,
        nodeTypes=nodeTypes,
        preemptableNodeTypes=preemptableNodeTypes,
        numWorkers=numNodes,
        numPreemptableWorkers=numPreemptableNodes,
        keyName=config.keyPairName,
        botoPath=config.botoPath,
        clusterName=config.clusterName,
        spotBids=spotBids,
        userTags=tagsDict,
        zone=config.zone,
        leaderStorage=config.leaderStorage,
        nodeStorage=config.nodeStorage,
        vpcSubnet=config.vpcSubnet,
        publicKeyFile=config.publicKeyFile,
        azureStorageCredentials=config.azureStorageCredentials)
Example #38
0
def main():
    parser = getBasicOptionParser()
    parser = addBasicProvisionerOptions(parser)
    parser.add_argument("--leaderNodeType", dest="leaderNodeType", required=True,
                        help="Non-preemptable node type to use for the cluster leader.")
    parser.add_argument("--keyPairName", dest='keyPairName', required=True,
                        help="The name of the AWS or ssh key pair to include on the instance")
    parser.add_argument("--boto", dest='botoPath',
                        help="The path to the boto credentials directory. This is transferred to all "
                             " nodes in order to access the AWS jobStore from non-AWS instances.")
    parser.add_argument("-t", "--tag", metavar='NAME=VALUE', dest='tags', default=[], action='append',
                        help="Tags are added to the AWS cluster for this node and all of its "
                             "children. Tags are of the form:\n"
                             " -t key1=value1 --tag key2=value2\n"
                             "Multiple tags are allowed and each tag needs its own flag. By "
                             "default the cluster is tagged with "
                             " {\n"
                             "      \"Name\": clusterName,\n"
                             "      \"Owner\": IAM username\n"
                             " }. ")
    parser.add_argument("--vpcSubnet",
                        help="VPC subnet ID to launch cluster in. Uses default subnet if not specified. "
                        "This subnet needs to have auto assign IPs turned on.")
    parser.add_argument("--nodeTypes", dest='nodeTypes', default=None, type=str,
                        help="Comma-separated list of node types to create while launching the leader. The "
                             "syntax for each node type depends on the "
                             "provisioner used. For the aws provisioner this is the name of an "
                             "EC2 instance type followed by a colon and the price in dollar to "
                             "bid for a spot instance, for example 'c3.8xlarge:0.42'. Must also provide "
                             "the --workers argument to specify how many workers of each node type to create")
    parser.add_argument("-w", "--workers", dest='workers', default=None, type=str,
                        help="Comma-separated list of the number of workers of each node type to launch "
                             "alongside the leader when the "
                             "cluster is created. This can be useful if running toil without "
                             "auto-scaling but with need of more hardware support")
    parser.add_argument("--leaderStorage", dest='leaderStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for the leader instance. "
                             "This is an EBS volume.")
    parser.add_argument("--nodeStorage", dest='nodeStorage', type=int, default=50,
                        help="Specify the size (in gigabytes) of the root volume for any worker instances "
                             "created when using the -w flag. This is an EBS volume.")
    config = parseBasicOptions(parser)
    tagsDict = None if config.tags is None else createTagsDict(config.tags)

    spotBids = []
    nodeTypes = []
    preemptableNodeTypes = []
    numNodes = []
    numPreemptableNodes = []
    leaderSpotBid = None
    if config.provisioner == 'aws':
        logger.info('Using aws provisioner.')
        try:
            from toil.provisioners.aws.awsProvisioner import AWSProvisioner
        except ImportError:
            logger.error('The aws extra must be installed to use this provisioner')
            raise
        provisioner = AWSProvisioner()
    elif config.provisioner == 'gce':
        logger.info('Using a gce provisioner.')
        try:
            from toil.provisioners.gceProvisioner import GCEProvisioner
        except ImportError:
            logger.error('The google extra must be installed to use this provisioner')
            raise
        provisioner = GCEProvisioner()
    else:
        assert False


    #Parse leader node type and spot bid
    parsedBid = config.leaderNodeType.split(':', 1)
    if len(config.leaderNodeType) != len(parsedBid[0]):
        leaderSpotBid = float(parsedBid[1])
        config.leaderNodeType = parsedBid[0]

    if (config.nodeTypes or config.workers) and not (config.nodeTypes and config.workers):
        raise RuntimeError("The --nodeTypes and --workers options must be specified together,")
    if config.nodeTypes:
        nodeTypesList = config.nodeTypes.split(",")
        numWorkersList = config.workers.split(",")
        if not len(nodeTypesList) == len(numWorkersList):
            raise RuntimeError("List of node types must be same length as list of numbers of workers.")
        for nodeTypeStr, num in zip(nodeTypesList, numWorkersList):
            parsedBid = nodeTypeStr.split(':', 1)
            if len(nodeTypeStr) != len(parsedBid[0]):
                #Is a preemptable node

                preemptableNodeTypes.append(parsedBid[0])
                spotBids.append(float(parsedBid[1]))
                numPreemptableNodes.append(int(num))
            else:
                nodeTypes.append(nodeTypeStr)
                numNodes.append(int(num))


    provisioner.launchCluster(leaderNodeType=config.leaderNodeType,
                              leaderSpotBid=leaderSpotBid,
                              nodeTypes=nodeTypes,
                              preemptableNodeTypes=preemptableNodeTypes,
                              numWorkers=numNodes,
                              numPreemptableWorkers = numPreemptableNodes,
                              keyName=config.keyPairName,
                              botoPath=config.botoPath,
                              clusterName=config.clusterName,
                              spotBids=spotBids,
                              userTags=tagsDict,
                              zone=config.zone,
                              leaderStorage=config.leaderStorage,
                              nodeStorage=config.nodeStorage,
                              vpcSubnet=config.vpcSubnet)
Example #39
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser("usage: %prog [--jobStore] JOB_TREE_DIR [options]", "%prog 0.1")
    
    parser.add_option("--jobStore", dest="jobStore",
                      help="Job store path. Can also be specified as the single argument to the script.\
                       default=%default", default=os.path.abspath("./toil"))
    
    parser.add_option("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%default",
                      default=False)
    
    parser.add_option("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%default",
                      default=False)
    
    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    assert len(args) <= 1 #Only toil may be specified as argument
    if len(args) == 1: #Allow toil directory as arg
        options.jobStore = args[0]
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for toil")
    assert options.jobStore != None
    
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = loadJobStore(options.jobStore)
    try:
        rootJob = Job._loadRootJob(jobStore)
    except JobException:
        print "The root job of the jobStore is not present, the toil workflow has probably completed okay"
        sys.exit(0)
    
    toilState = ToilState(jobStore, rootJob )
    
    failedJobs = [ job for job in toilState.updatedJobs | \
                  set(toilState.successorCounts.keys()) \
                  if job.remainingRetryCount == 0 ]
    
    print "There are %i active jobs, %i parent jobs with children, and \
    %i totally failed jobs currently in toil workflow: %s" % \
    (len(toilState.updatedJobs), len(toilState.successorCounts),
     len(failedJobs), options.jobStore)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print "Log file for job %s is not present" % job.jobStoreID
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"   
    
    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
Example #40
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print('The root job of the job store is absent, the workflow completed successfully.',
              file=sys.stderr)
        sys.exit(0)
    
    toilState = ToilState(jobStore, rootJob )

    # The first element of the toilState.updatedJobs tuple is the jobGraph we want to inspect
    totalJobs = set(toilState.successorCounts.keys()) | \
                {jobTuple[0] for jobTuple in toilState.updatedJobs}

    failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ]

    print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs '
          'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts),
                                len(failedJobs), config.jobStore), file=sys.stderr)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr)
        if len(failedJobs) == 0:
            print('There are no failed jobs to report.', file=sys.stderr)
    
    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
Example #41
0
def main():
    """Reports the state of the toil.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of a job store that holds the information about the "
        "workflow whose status is to be reported on." + jobStoreLocatorHelp)

    parser.add_argument(
        "--verbose",
        dest="verbose",
        action="store_true",
        help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
        default=False)

    parser.add_argument(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%(default)s",
        default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    ##########################################
    #Do some checks.
    ##########################################

    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################

    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print(
            'The root job of the job store is absent, the workflow completed successfully.',
            file=sys.stderr)
        sys.exit(0)

    def traverseGraph(jobGraph):
        foundJobStoreIDs = set()
        totalJobs = []

        def inner(jobGraph):
            if jobGraph.jobStoreID in foundJobStoreIDs:
                return
            foundJobStoreIDs.add(jobGraph.jobStoreID)
            totalJobs.append(jobGraph)
            # Traverse jobs in stack
            for jobs in jobGraph.stack:
                for successorJobStoreID in map(lambda x: x.jobStoreID, jobs):
                    if (successorJobStoreID not in foundJobStoreIDs
                            and jobStore.exists(successorJobStoreID)):
                        inner(jobStore.load(successorJobStoreID))

            # Traverse service jobs
            for jobs in jobGraph.services:
                for serviceJobStoreID in map(lambda x: x.jobStoreID, jobs):
                    if jobStore.exists(serviceJobStoreID):
                        assert serviceJobStoreID not in foundJobStoreIDs
                        foundJobStoreIDs.add(serviceJobStoreID)
                        totalJobs.append(jobStore.load(serviceJobStoreID))

        inner(jobGraph)
        return totalJobs

    logger.info('Traversing the job graph. This may take a couple minutes.')
    totalJobs = traverseGraph(rootJob)

    failedJobs = []
    hasChildren = []
    hasServices = []
    services = []
    currentlyRunnning = []

    for job in totalJobs:
        if job.logJobStoreFileID is not None:
            failedJobs.append(job)
        if job.stack:
            hasChildren.append(job)
        elif job.remainingRetryCount != 0 and job.logJobStoreFileID != 0 and job.command:
            # The job has no children, hasn't failed, and has a command to run. This indicates that the job is
            # likely currently running, or at least could be run.
            currentlyRunnning.append(job)
        if job.services:
            hasServices.append(job)
        if job.startJobStoreID or job.terminateJobStoreID or job.errorJobStoreID:
            # these attributes are only set in service jobs
            services.append(job)

    logger.info(
        'There are %i unfinished jobs, %i parent jobs with children, %i jobs with services, %i services, '
        'and %i totally failed jobs currently in %s.' %
        (len(totalJobs), len(hasChildren), len(hasServices), len(services),
         len(failedJobs), config.jobStore))

    if currentlyRunnning:
        logger.info('These %i jobs are currently active: %s',
                    len(currentlyRunnning),
                    ' \n'.join(map(str, currentlyRunnning)))

    if options.verbose:  #Verbose currently means outputting the files that have failed.
        if failedJobs:
            msg = "Outputting logs for the %i failed jobs" % (len(failedJobs))
            msg += ": %s" % ", ".join(
                (str(failedJob) for failedJob in failedJobs))
            for jobNode in failedJobs:
                job = jobStore.load(jobNode.jobStoreID)
                msg += "\n=========> Failed job %s \n" % jobNode
                with job.getLogFileHandle(jobStore) as fH:
                    msg += fH.read()
                msg += "<=========\n"
            print(msg)
        else:
            print('There are no failed jobs to report.', file=sys.stderr)

    if totalJobs and options.failIfNotComplete:
        exit(
            1
        )  # when the workflow is complete, all jobs will have been removed from job store
Example #42
0
def main():
    """Reports the state of the toil.
    """

    ##########################################
    # Construct the arguments.
    ##########################################

    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help=(
            "Store in which to place job management files \
              and the global accessed temporary files"
            "(If this is a file path this needs to be globally accessible "
            "by all machines running jobs).\n"
            "If the store already exists and restart is false an"
            " ExistingJobStoreException exception will be thrown."
        ),
    )

    parser.add_argument(
        "--verbose",
        dest="verbose",
        action="store_true",
        help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
        default=False,
    )

    parser.add_argument(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
        default=False,
    )
    parser.add_argument("--version", action="version", version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    ##########################################
    # Do some checks.
    ##########################################

    logger.info("Checking if we have files for toil")
    assert options.jobStore != None

    ##########################################
    # Survey the status of the job and report.
    ##########################################

    jobStore = loadJobStore(options.jobStore)
    try:
        rootJob = Job._loadRootJob(jobStore)
    except JobException:
        print "The root job of the jobStore is not present, the toil workflow has probably completed okay"
        sys.exit(0)

    toilState = ToilState(jobStore, rootJob)

    failedJobs = [
        job for job in toilState.updatedJobs | set(toilState.successorCounts.keys()) if job.remainingRetryCount == 0
    ]

    print "There are %i active jobs, %i parent jobs with children, and \
    %i totally failed jobs currently in toil workflow: %s" % (
        len(toilState.updatedJobs),
        len(toilState.successorCounts),
        len(failedJobs),
        options.jobStore,
    )

    if options.verbose:  # Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print "Log file for job %s is not present" % job.jobStoreID
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"

    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and options.failIfNotComplete:
        sys.exit(1)
Example #43
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
              help=("Store in which to place job management files \
              and the global accessed temporary files"
              "(If this is a file path this needs to be globally accessible "
              "by all machines running jobs).\n"
              "If the store already exists and restart is false an"
              " JobStoreCreationException exception will be thrown."))
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None

    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = Toil.loadOrCreateJobStore(options.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print('The root job of the job store is absent, the workflow completed successfully.',
              file=sys.stderr)
        sys.exit(0)
    
    toilState = ToilState(jobStore, rootJob )

    # The first element of the toilState.updatedJobs tuple is the jobWrapper we want to inspect
    totalJobs = set(toilState.successorCounts.keys()) | \
                {jobTuple[0] for jobTuple in toilState.updatedJobs}

    failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ]

    print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs '
          'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts),
                                len(failedJobs), options.jobStore), file=sys.stderr)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr)
        if len(failedJobs) == 0:
            print('There are no failed jobs to report.', file=sys.stderr)
    
    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
Example #44
0
def main():
    """Reports the state of the toil.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser(
        "usage: %prog [--toil] JOB_TREE_DIR [options]", "%prog 0.1")

    parser.add_option(
        "--toil",
        dest="toil",
        help=
        "Batchjob store path. Can also be specified as the single argument to the script.\
                       default=%default",
        default='./toil')

    parser.add_option(
        "--verbose",
        dest="verbose",
        action="store_true",
        help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%default",
        default=False)

    parser.add_option(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%default",
        default=False)

    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    assert len(args) <= 1  #Only toil may be specified as argument
    if len(args) == 1:  #Allow toil directory as arg
        options.toil = args[0]

    ##########################################
    #Do some checks.
    ##########################################

    logger.info("Checking if we have files for toil")
    assert options.toil != None

    ##########################################
    #Survey the status of the batchjob and report.
    ##########################################

    jobStore = loadJobStore(options.toil)
    config = jobStore.config
    toilState = jobStore.loadToilState(
    )  #This initialises the object toil.toilState used to track the active toil

    failedJobs = [ batchjob for batchjob in toilState.updatedJobs | \
                  set(toilState.childCounts.keys()) \
                  if batchjob.remainingRetryCount == 0 ]

    print "There are %i active jobs, %i parent jobs with children, \
    %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) \
    currently in toil: %s"                           % \
    (len(toilState.updatedJobs), len(toilState.childCounts),
     len(failedJobs), len(toilState.shellJobs), options.toil)

    if options.verbose:  #Verbose currently means outputting the files that have failed.
        for batchjob in failedJobs:
            if batchjob.logJobStoreFileID is not None:
                with batchjob.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, batchjob.jobStoreID, logger.warn)
            else:
                print "Log file for batchjob %s is not present" % batchjob.jobStoreID
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"

    if (len(toilState.updatedJobs) + len(toilState.childCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
Example #45
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser("usage: %prog [--toil] JOB_TREE_DIR [options]", "%prog 0.1")
    
    parser.add_option("--toil", dest="toil",
                      help="Batchjob store path. Can also be specified as the single argument to the script.\
                       default=%default", default='./toil')
    
    parser.add_option("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%default",
                      default=False)
    
    parser.add_option("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%default",
                      default=False)
    
    options, args = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    assert len(args) <= 1 #Only toil may be specified as argument
    if len(args) == 1: #Allow toil directory as arg
        options.toil = args[0]
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for toil")
    assert options.toil != None
    
    ##########################################
    #Survey the status of the batchjob and report.
    ##########################################  
    
    jobStore = loadJobStore(options.toil)
    config = jobStore.config
    toilState = jobStore.loadToilState() #This initialises the object toil.toilState used to track the active toil
    
    failedJobs = [ batchjob for batchjob in toilState.updatedJobs | \
                  set(toilState.childCounts.keys()) \
                  if batchjob.remainingRetryCount == 0 ]
    
    print "There are %i active jobs, %i parent jobs with children, \
    %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) \
    currently in toil: %s" % \
    (len(toilState.updatedJobs), len(toilState.childCounts),
     len(failedJobs), len(toilState.shellJobs), options.toil)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for batchjob in failedJobs:
            if batchjob.logJobStoreFileID is not None:
                with batchjob.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, batchjob.jobStoreID, logger.warn)
            else:
                print "Log file for batchjob %s is not present" % batchjob.jobStoreID
        if len(failedJobs) == 0:
            print "There are no failed jobs to report"   
    
    if (len(toilState.updatedJobs) + len(toilState.childCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)