Exemple #1
0
 def run(self, file_store):
     cwljob = resolve_indirect(self.cwljob)
     fill_in_defaults(
         self.cwltool.tool['inputs'], cwljob,
         self.runtime_context.make_fs_access(
             self.runtime_context.basedir or ""))
     realjob = CWLJob(self.cwltool, cwljob, self.runtime_context)
     self.addChild(realjob)
     return realjob.rv()
Exemple #2
0
    def run(self, file_store):
        cwljob = resolve_indirect(self.cwljob)
        fill_in_defaults(
            self.step_inputs, cwljob,
            self.runtime_context.make_fs_access(""))
        for inp_id in cwljob.keys():
            found = False
            for field in self.cwltool.inputs_record_schema['fields']:
                if field['name'] == inp_id:
                    found = True
            if not found:
                cwljob.pop(inp_id)

        # Exports temporary directory for batch systems that reset TMPDIR
        os.environ["TMPDIR"] = os.path.realpath(
            self.runtime_context.tmpdir or file_store.getLocalTempDir())
        outdir = os.path.join(file_store.getLocalTempDir(), "out")
        os.mkdir(outdir)
        top_tmp_outdir = self.workdir or os.environ["TMPDIR"]
        tmp_outdir_prefix = os.path.join(
            _makeNestedTempDir(top=top_tmp_outdir, seed=outdir, levels=2),
            "out_tmpdir")
        self.openTempDirs.append(top_tmp_outdir)

        index = {}
        existing = {}
        runtime_context = self.runtime_context.copy()
        runtime_context.basedir = os.getcwd()
        runtime_context.outdir = outdir
        runtime_context.tmp_outdir_prefix = tmp_outdir_prefix
        runtime_context.tmpdir_prefix = file_store.getLocalTempDir()
        runtime_context.make_fs_access = functools.partial(
            ToilFsAccess, file_store=file_store)
        runtime_context.toil_get_file = functools.partial(
            toil_get_file, file_store, index, existing)
        # Run the tool
        (output, status) = cwltool.executors.SingleJobExecutor().execute(
            self.cwltool, cwljob, runtime_context, cwllogger)
        if status != "success":
            raise cwltool.errors.WorkflowException(status)

        adjustDirObjs(output, functools.partial(
            get_listing, cwltool.stdfsaccess.StdFsAccess(outdir),
            recursive=True))

        adjustFileObjs(output, functools.partial(
            uploadFile, functools.partial(writeGlobalFileWrapper, file_store),
            index, existing))

        return output
Exemple #3
0
def upload_job_order(arvrunner, name, tool, job_order):
    """Upload local files referenced in the input object and return updated input
    object with 'location' updated to the proper keep references.
    """

    # Make a copy of the job order and set defaults.
    builder_job_order = copy.copy(job_order)

    # fill_in_defaults throws an error if there are any
    # missing required parameters, we don't want it to do that
    # so make them all optional.
    inputs_copy = copy.deepcopy(tool.tool["inputs"])
    for i in inputs_copy:
        if "null" not in i["type"]:
            i["type"] = ["null"] + aslist(i["type"])

    fill_in_defaults(inputs_copy,
                     builder_job_order,
                     arvrunner.fs_access)
    # Need to create a builder object to evaluate expressions.
    builder = make_builder(builder_job_order,
                           tool.hints,
                           tool.requirements,
                           ArvRuntimeContext(),
                           tool.metadata)
    # Now update job_order with secondaryFiles
    discover_secondary_files(arvrunner.fs_access,
                             builder,
                             tool.tool["inputs"],
                             job_order)

    jobmapper = upload_dependencies(arvrunner,
                                    name,
                                    tool.doc_loader,
                                    job_order,
                                    job_order.get("id", "#"),
                                    False)

    if "id" in job_order:
        del job_order["id"]

    # Need to filter this out, gets added by cwltool when providing
    # parameters on the command line.
    if "job_order" in job_order:
        del job_order["job_order"]

    return job_order
Exemple #4
0
def main(args=None, stdout=sys.stdout):
    """Main method for toil-cwl-runner."""
    cwllogger.removeHandler(defaultStreamHandler)
    config = Config()
    config.cwl = True
    parser = argparse.ArgumentParser()
    addOptions(parser, config)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", nargs=argparse.REMAINDER)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--not-strict", action="store_true")
    parser.add_argument("--quiet", dest="logLevel", action="store_const",
                        const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=baseVersion)
    dockergroup = parser.add_mutually_exclusive_group()
    dockergroup.add_argument(
        "--user-space-docker-cmd",
        help="(Linux/OS X only) Specify a user space docker command (like "
        "udocker or dx-docker) that will be used to call 'pull' and 'run'")
    dockergroup.add_argument(
        "--singularity", action="store_true", default=False,
        help="[experimental] Use Singularity runtime for running containers. "
        "Requires Singularity v2.3.2+ and Linux with kernel version v3.18+ or "
        "with overlayfs support backported.")
    dockergroup.add_argument(
        "--no-container", action="store_true", help="Do not execute jobs in a "
        "Docker container, even when `DockerRequirement` "
        "is specified under `hints`.")
    parser.add_argument(
        "--preserve-environment", type=str, nargs='+',
        help="Preserve specified environment variables when running"
        " CommandLineTools", metavar=("VAR1 VAR2"), default=("PATH",),
        dest="preserve_environment")
    parser.add_argument(
        "--destBucket", type=str,
        help="Specify a cloud bucket endpoint for output files.")
    parser.add_argument(
        "--beta-dependency-resolvers-configuration", default=None)
    parser.add_argument("--beta-dependencies-directory", default=None)
    parser.add_argument(
        "--beta-use-biocontainers", default=None, action="store_true")
    parser.add_argument(
        "--beta-conda-dependencies", default=None, action="store_true")
    parser.add_argument("--tmpdir-prefix", type=Text,
                        help="Path prefix for temporary directories",
                        default="tmp")
    parser.add_argument("--tmp-outdir-prefix", type=Text,
                        help="Path prefix for intermediate output directories",
                        default="tmp")
    parser.add_argument(
        "--force-docker-pull", action="store_true", default=False,
        dest="force_docker_pull",
        help="Pull latest docker image even if it is locally present")
    parser.add_argument(
        "--no-match-user", action="store_true", default=False,
        help="Disable passing the current uid to `docker run --user`")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    # we use workdir as jobStore:
    options = parser.parse_args([workdir] + args)

    # if tmpdir_prefix is not the default value, set workDir too
    if options.tmpdir_prefix != 'tmp':
        options.workDir = options.tmpdir_prefix

    if options.provisioner and not options.jobStore:
        raise NoSuchJobStoreException(
            'Please specify a jobstore with the --jobStore option when specifying a provisioner.')

    use_container = not options.no_container

    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    outdir = os.path.abspath(options.outdir)
    tmp_outdir_prefix = os.path.abspath(options.tmp_outdir_prefix)
    tmpdir_prefix = os.path.abspath(options.tmpdir_prefix)

    fileindex = {}
    existing = {}
    conf_file = getattr(options,
                        "beta_dependency_resolvers_configuration", None)
    use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
    job_script_provider = None
    if conf_file or use_conda_dependencies:
        dependencies_configuration = DependenciesConfiguration(options)
        job_script_provider = dependencies_configuration

    options.default_container = None
    runtime_context = cwltool.context.RuntimeContext(vars(options))
    runtime_context.find_default_container = functools.partial(
        find_default_container, options)
    runtime_context.workdir = workdir
    runtime_context.move_outputs = "leave"
    runtime_context.rm_tmpdir = False
    loading_context = cwltool.context.LoadingContext(vars(options))

    with Toil(options) as toil:
        if options.restart:
            outobj = toil.restart()
        else:
            loading_context.hints = [{
                "class": "ResourceRequirement",
                "coresMin": toil.config.defaultCores,
                "ramMin": toil.config.defaultMemory / (2**20),
                "outdirMin": toil.config.defaultDisk / (2**20),
                "tmpdirMin": 0
            }]
            loading_context.construct_tool_object = toil_make_tool
            loading_context.resolver = cwltool.resolver.tool_resolver
            loading_context.strict = not options.not_strict
            options.workflow = options.cwltool
            options.job_order = options.cwljob
            uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
                options.cwltool, loading_context.resolver,
                loading_context.fetcher_constructor)
            options.tool_help = None
            options.debug = options.logLevel == "DEBUG"
            job_order_object, options.basedir, jobloader = \
                cwltool.main.load_job_order(
                    options, sys.stdin, loading_context.fetcher_constructor,
                    loading_context.overrides_list, tool_file_uri)
            document_loader, workflowobj, uri = \
                cwltool.load_tool.fetch_document(
                    uri, loading_context.resolver,
                    loading_context.fetcher_constructor)
            document_loader, avsc_names, processobj, metadata, uri = \
                cwltool.load_tool.validate_document(
                    document_loader, workflowobj, uri,
                    loading_context.enable_dev, loading_context.strict, False,
                    loading_context.fetcher_constructor, False,
                    loading_context.overrides_list,
                    do_validate=loading_context.do_validate)
            loading_context.overrides_list.extend(
                metadata.get("cwltool:overrides", []))
            try:
                tool = cwltool.load_tool.make_tool(
                    document_loader, avsc_names, metadata, uri,
                    loading_context)
            except cwltool.process.UnsupportedRequirement as err:
                logging.error(err)
                return 33
            runtime_context.secret_store = SecretStore()
            initialized_job_order = cwltool.main.init_job_order(
                job_order_object, options, tool, jobloader, sys.stdout,
                secret_store=runtime_context.secret_store)
            fs_access = cwltool.stdfsaccess.StdFsAccess(options.basedir)
            fill_in_defaults(
                tool.tool["inputs"], initialized_job_order, fs_access)

            def path_to_loc(obj):
                if "location" not in obj and "path" in obj:
                    obj["location"] = obj["path"]
                    del obj["path"]

            def import_files(tool):
                visit_class(tool, ("File", "Directory"), path_to_loc)
                visit_class(tool, ("File", ), functools.partial(
                    add_sizes, fs_access))
                normalizeFilesDirs(tool)
                adjustDirObjs(tool, functools.partial(
                    get_listing, fs_access, recursive=True))
                adjustFileObjs(tool, functools.partial(
                    uploadFile, toil.importFile, fileindex, existing,
                    skip_broken=True))

            tool.visit(import_files)

            for inp in tool.tool["inputs"]:
                def set_secondary(fileobj):
                    if isinstance(fileobj, Mapping) \
                            and fileobj.get("class") == "File":
                        if "secondaryFiles" not in fileobj:
                            fileobj["secondaryFiles"] = [
                                {"location": cwltool.builder.substitute(
                                    fileobj["location"], sf), "class": "File"}
                                for sf in inp["secondaryFiles"]]

                    if isinstance(fileobj, MutableSequence):
                        for entry in fileobj:
                            set_secondary(entry)

                if shortname(inp["id"]) in initialized_job_order \
                        and inp.get("secondaryFiles"):
                    set_secondary(initialized_job_order[shortname(inp["id"])])

            import_files(initialized_job_order)
            visitSteps(tool, import_files)

            try:
                runtime_context.use_container = use_container
                runtime_context.tmpdir = os.path.realpath(tmpdir_prefix)
                runtime_context.tmp_outdir_prefix = os.path.realpath(
                    tmp_outdir_prefix)
                runtime_context.job_script_provider = job_script_provider
                runtime_context.force_docker_pull = options.force_docker_pull
                runtime_context.no_match_user = options.no_match_user
                (wf1, _) = makeJob(tool, {}, None, runtime_context)
            except cwltool.process.UnsupportedRequirement as err:
                logging.error(err)
                return 33

            wf1.cwljob = initialized_job_order
            if wf1 is CWLJob:  # Clean up temporary directories only created with CWLJobs.
                wf1.addFollowOnFn(cleanTempDirs, wf1)
            outobj = toil.start(wf1)

        outobj = resolve_indirect(outobj)

        # Stage files. Specify destination bucket if specified in CLI
        # options. If destination bucket not passed in,
        # options.destBucket's value will be None.
        toilStageFiles(
            toil,
            outobj,
            outdir,
            fileindex,
            existing,
            export=True,
            destBucket=options.destBucket)

        if not options.destBucket:
            visit_class(outobj, ("File",), functools.partial(
                compute_checksums, cwltool.stdfsaccess.StdFsAccess("")))

        visit_class(outobj, ("File", ), MutationManager().unset_generation)
        stdout.write(json.dumps(outobj, indent=4))

    return 0