Ejemplo n.º 1
0
def execute_workflow_step(workflow,
                          task_id,
                          job_data,
                          cwl_args=None,
                          executor=None):
    """
    Constructs and executes single step workflow based on the "workflow"
    and "task_id". "cwl_args" can be used to update default parameters
    used for loading and runtime contexts. Exports json file with the
    execution results.
    """

    cwl_args = {} if cwl_args is None else cwl_args
    executor = SingleJobExecutor() if executor is None else executor

    step_tmp_folder, step_cache_folder, step_outputs_folder, step_report = get_temp_folders(
        task_id=task_id, job_data=job_data)

    default_cwl_args = get_default_cwl_args(cwl_args)

    default_cwl_args.update({  # add execution specific parameters
        "tmp_outdir_prefix": step_cache_folder + "/",
        "tmpdir_prefix": step_cache_folder + "/",
        "cidfile_dir": step_tmp_folder,
        "cidfile_prefix": task_id,
        "basedir": os.getcwd(
        ),  # job should already have abs path for inputs, so this is useless
        "outdir": step_outputs_folder
    })

    workflow_step_path = os.path.join(step_tmp_folder,
                                      task_id + "_step_workflow.cwl")

    fast_cwl_step_load(  # will save new worlflow to "workflow_step_path"
        workflow=workflow,
        target_id=task_id,
        cwl_args=default_cwl_args,
        location=workflow_step_path)

    _stderr = sys.stderr  # to trick the logger
    sys.stderr = sys.__stderr__
    step_outputs, step_status = executor(
        slow_cwl_load(workflow=workflow_step_path, cwl_args=default_cwl_args),
        job_data, RuntimeContext(default_cwl_args))
    sys.stderr = _stderr

    if step_status != "success":
        raise ValueError

    # To remove "http://commonwl.org/cwltool#generation": 0 (copied from cwltool)
    visit_class(step_outputs, ("File", ), MutationManager().unset_generation)

    dump_json(step_outputs, step_report)

    return step_outputs, step_report
Ejemplo n.º 2
0
    def execute(self, context):

        post_status(context)

        self.cwlwf, it_is_workflow = load_cwl(
            self.dag.default_args["cwl_workflow"], self.dag.default_args)
        self.cwl_step = [
            step for step in self.cwlwf.steps
            if self.task_id == step.id.split("#")[-1]
        ][0] if it_is_workflow else self.cwlwf

        _logger.info('{0}: Running!'.format(self.task_id))

        upstream_task_ids = [t.task_id for t in self.upstream_list] + \
                            ([self.reader_task_id] if self.reader_task_id else [])
        _logger.debug('{0}: Collecting outputs from: \n{1}'.format(
            self.task_id, json.dumps(upstream_task_ids, indent=4)))

        upstream_data = self.xcom_pull(context=context,
                                       task_ids=upstream_task_ids)
        _logger.info('{0}: Upstream data: \n {1}'.format(
            self.task_id, json.dumps(upstream_data, indent=4)))

        promises = {}
        for data in upstream_data:  # upstream_data is an array with { promises and outdir }
            promises = merge(promises, data["promises"])
            if "outdir" in data:
                self.outdir = data["outdir"]

        _d_args = self.dag.default_args

        if not self.outdir:
            self.outdir = _d_args['tmp_folder']

        _logger.debug('{0}: Step inputs: {1}'.format(
            self.task_id, json.dumps(self.cwl_step.tool["inputs"], indent=4)))

        _logger.debug('{0}: Step outputs: {1}'.format(
            self.task_id, json.dumps(self.cwl_step.tool["outputs"], indent=4)))

        jobobj = {}

        for inp in self.cwl_step.tool["inputs"]:
            jobobj_id = shortname(inp["id"]).split("/")[-1]
            source_ids = []
            promises_outputs = []
            try:
                source_field = inp["source"] if it_is_workflow else inp.get(
                    "id")
                source_ids = [shortname(s)
                              for s in source_field] if isinstance(
                                  source_field,
                                  list) else [shortname(source_field)]
                promises_outputs = [
                    promises[source_id] for source_id in source_ids
                    if source_id in promises
                ]
            except:
                _logger.warning(
                    "{0}: Couldn't find source field in step input: {1}".
                    format(self.task_id, json.dumps(inp, indent=4)))

            _logger.info(
                '{0}: For input {1} with source_ids: {2} found upstream outputs: \n{3}'
                .format(self.task_id, jobobj_id, source_ids, promises_outputs))

            if len(promises_outputs) > 1:
                if inp.get("linkMerge", "merge_nested") == "merge_flattened":
                    jobobj[jobobj_id] = flatten(promises_outputs)
                else:
                    jobobj[jobobj_id] = promises_outputs
            # Should also check if [None], because in this case we need to take default value
            elif len(promises_outputs) == 1 and (promises_outputs[0]
                                                 is not None):
                jobobj[jobobj_id] = promises_outputs[0]
            elif "valueFrom" in inp:
                jobobj[jobobj_id] = None
            elif "default" in inp:
                d = copy.copy(inp["default"])
                jobobj[jobobj_id] = d
            else:
                continue

        _logger.debug('{0}: Collected job object: \n {1}'.format(
            self.task_id, json.dumps(jobobj, indent=4)))

        def _post_scatter_eval(shortio, cwl_step):
            _value_from = {
                shortname(i["id"]).split("/")[-1]: i["valueFrom"]
                for i in cwl_step.tool["inputs"] if "valueFrom" in i
            }
            _logger.debug('{0}: Step inputs with valueFrom: \n{1}'.format(
                self.task_id, json.dumps(_value_from, indent=4)))

            def value_from_func(k, v):
                if k in _value_from:
                    return expression.do_eval(_value_from[k],
                                              shortio,
                                              self.cwlwf.tool.get(
                                                  "requirements", []),
                                              None,
                                              None, {},
                                              context=v)
                else:
                    return v

            return {k: value_from_func(k, v) for k, v in shortio.items()}

        job = _post_scatter_eval(jobobj, self.cwl_step)
        _logger.info('{0}: Final job data: \n {1}'.format(
            self.task_id, json.dumps(job, indent=4)))

        _d_args['outdir'] = tempfile.mkdtemp(
            prefix=os.path.join(self.outdir, "step_tmp"))
        _d_args['tmpdir_prefix'] = os.path.join(_d_args['outdir'], 'cwl_tmp_')
        _d_args['tmp_outdir_prefix'] = os.path.join(_d_args['outdir'],
                                                    'cwl_outdir_')

        _d_args["record_container_id"] = True
        _d_args["cidfile_dir"] = _d_args['outdir']
        _d_args["cidfile_prefix"] = self.task_id

        _logger.debug('{0}: Runtime context: \n {1}'.format(self, _d_args))

        executor = SingleJobExecutor()
        runtimeContext = RuntimeContext(_d_args)
        runtimeContext.make_fs_access = getdefault(
            runtimeContext.make_fs_access, StdFsAccess)

        for inp in self.cwl_step.tool["inputs"]:
            if inp.get("not_connected"):
                del job[shortname(inp["id"].split("/")[-1])]

        _stderr = sys.stderr
        sys.stderr = sys.__stderr__
        (output, status) = executor(
            self.cwl_step.embedded_tool if it_is_workflow else self.cwl_step,
            job,
            runtimeContext,
            logger=_logger)
        sys.stderr = _stderr

        if not output and status == "permanentFail":
            raise ValueError

        _logger.debug('{0}: Embedded tool outputs: \n {1}'.format(
            self.task_id, json.dumps(output, indent=4)))

        promises = {}

        for out in self.cwl_step.tool["outputs"]:

            out_id = shortname(out["id"])
            jobout_id = out_id.split("/")[-1]
            try:
                promises[out_id] = output[jobout_id]
            except:
                continue

        # Unsetting the Generation from final output object
        visit_class(promises, ("File", ), MutationManager().unset_generation)

        data = {"promises": promises, "outdir": self.outdir}

        _logger.info('{0}: Output: \n {1}'.format(self.task_id,
                                                  json.dumps(data, indent=4)))

        return data
Ejemplo n.º 3
0
    def executor(self, tool, job_order, **kwargs):
        final_output = []
        final_status = []

        def output_callback(out, processStatus):
            final_status.append(processStatus)
            final_output.append(out)

        if "basedir" not in kwargs:
            raise WorkflowException("Must provide 'basedir' in kwargs")

        output_dirs = set()

        if kwargs.get("outdir"):
            finaloutdir = os.path.abspath(kwargs.get("outdir"))
        else:
            finaloutdir = None

        if kwargs.get("tmp_outdir_prefix"):
            kwargs["outdir"] = tempfile.mkdtemp(
                prefix=kwargs["tmp_outdir_prefix"])
        else:
            kwargs["outdir"] = tempfile.mkdtemp()

        output_dirs.add(kwargs["outdir"])
        kwargs["mutation_manager"] = MutationManager()

        jobReqs = None
        if "cwl:requirements" in job_order:
            jobReqs = job_order["cwl:requirements"]
        elif ("cwl:defaults" in tool.metadata
              and "cwl:requirements" in tool.metadata["cwl:defaults"]):
            jobReqs = tool.metadata["cwl:defaults"]["cwl:requirements"]
        if jobReqs:
            for req in jobReqs:
                tool.requirements.append(req)

        if kwargs.get("default_container"):
            tool.requirements.insert(
                0, {
                    "class": "DockerRequirement",
                    "dockerPull": kwargs["default_container"]
                })

        jobs = tool.job(job_order, output_callback, **kwargs)
        try:
            for runnable in jobs:
                if runnable:
                    builder = kwargs.get("builder", None)
                    if builder is not None:
                        runnable.builder = builder
                    if runnable.outdir:
                        output_dirs.add(runnable.outdir)
                    runnable.run(**kwargs)
                else:
                    time.sleep(1)

        except WorkflowException as e:
            raise e
        except Exception as e:
            log.error("Got exception")
            raise WorkflowException(str(e))

        # wait for all processes to finish
        self.wait()

        if final_output and final_output[0] and finaloutdir:
            final_output[0] = relocateOutputs(final_output[0], finaloutdir,
                                              output_dirs,
                                              kwargs.get("move_outputs"),
                                              kwargs["make_fs_access"](""))

        if kwargs.get("rm_tmpdir"):
            cleanIntermediate(output_dirs)

        if final_output and final_status:
            return (final_output[0], final_status[0])
        else:
            return (None, "permanentFail")
Ejemplo n.º 4
0
def main(args=None, stdout=sys.stdout):
    """Main method for toil-cwl-runner."""
    cwllogger.removeHandler(defaultStreamHandler)
    config = Config()
    config.cwl = True
    parser = argparse.ArgumentParser()
    addOptions(parser, config)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", nargs=argparse.REMAINDER)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--not-strict", action="store_true")
    parser.add_argument("--quiet", dest="logLevel", action="store_const",
                        const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=baseVersion)
    dockergroup = parser.add_mutually_exclusive_group()
    dockergroup.add_argument(
        "--user-space-docker-cmd",
        help="(Linux/OS X only) Specify a user space docker command (like "
        "udocker or dx-docker) that will be used to call 'pull' and 'run'")
    dockergroup.add_argument(
        "--singularity", action="store_true", default=False,
        help="[experimental] Use Singularity runtime for running containers. "
        "Requires Singularity v2.3.2+ and Linux with kernel version v3.18+ or "
        "with overlayfs support backported.")
    dockergroup.add_argument(
        "--no-container", action="store_true", help="Do not execute jobs in a "
        "Docker container, even when `DockerRequirement` "
        "is specified under `hints`.")
    parser.add_argument(
        "--preserve-environment", type=str, nargs='+',
        help="Preserve specified environment variables when running"
        " CommandLineTools", metavar=("VAR1 VAR2"), default=("PATH",),
        dest="preserve_environment")
    parser.add_argument(
        "--destBucket", type=str,
        help="Specify a cloud bucket endpoint for output files.")
    parser.add_argument(
        "--beta-dependency-resolvers-configuration", default=None)
    parser.add_argument("--beta-dependencies-directory", default=None)
    parser.add_argument(
        "--beta-use-biocontainers", default=None, action="store_true")
    parser.add_argument(
        "--beta-conda-dependencies", default=None, action="store_true")
    parser.add_argument("--tmpdir-prefix", type=Text,
                        help="Path prefix for temporary directories",
                        default="tmp")
    parser.add_argument("--tmp-outdir-prefix", type=Text,
                        help="Path prefix for intermediate output directories",
                        default="tmp")
    parser.add_argument(
        "--force-docker-pull", action="store_true", default=False,
        dest="force_docker_pull",
        help="Pull latest docker image even if it is locally present")
    parser.add_argument(
        "--no-match-user", action="store_true", default=False,
        help="Disable passing the current uid to `docker run --user`")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    # we use workdir as jobStore:
    options = parser.parse_args([workdir] + args)

    # if tmpdir_prefix is not the default value, set workDir too
    if options.tmpdir_prefix != 'tmp':
        options.workDir = options.tmpdir_prefix

    if options.provisioner and not options.jobStore:
        raise NoSuchJobStoreException(
            'Please specify a jobstore with the --jobStore option when specifying a provisioner.')

    use_container = not options.no_container

    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    outdir = os.path.abspath(options.outdir)
    tmp_outdir_prefix = os.path.abspath(options.tmp_outdir_prefix)
    tmpdir_prefix = os.path.abspath(options.tmpdir_prefix)

    fileindex = {}
    existing = {}
    conf_file = getattr(options,
                        "beta_dependency_resolvers_configuration", None)
    use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
    job_script_provider = None
    if conf_file or use_conda_dependencies:
        dependencies_configuration = DependenciesConfiguration(options)
        job_script_provider = dependencies_configuration

    options.default_container = None
    runtime_context = cwltool.context.RuntimeContext(vars(options))
    runtime_context.find_default_container = functools.partial(
        find_default_container, options)
    runtime_context.workdir = workdir
    runtime_context.move_outputs = "leave"
    runtime_context.rm_tmpdir = False
    loading_context = cwltool.context.LoadingContext(vars(options))

    with Toil(options) as toil:
        if options.restart:
            outobj = toil.restart()
        else:
            loading_context.hints = [{
                "class": "ResourceRequirement",
                "coresMin": toil.config.defaultCores,
                "ramMin": toil.config.defaultMemory / (2**20),
                "outdirMin": toil.config.defaultDisk / (2**20),
                "tmpdirMin": 0
            }]
            loading_context.construct_tool_object = toil_make_tool
            loading_context.resolver = cwltool.resolver.tool_resolver
            loading_context.strict = not options.not_strict
            options.workflow = options.cwltool
            options.job_order = options.cwljob
            uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
                options.cwltool, loading_context.resolver,
                loading_context.fetcher_constructor)
            options.tool_help = None
            options.debug = options.logLevel == "DEBUG"
            job_order_object, options.basedir, jobloader = \
                cwltool.main.load_job_order(
                    options, sys.stdin, loading_context.fetcher_constructor,
                    loading_context.overrides_list, tool_file_uri)
            document_loader, workflowobj, uri = \
                cwltool.load_tool.fetch_document(
                    uri, loading_context.resolver,
                    loading_context.fetcher_constructor)
            document_loader, avsc_names, processobj, metadata, uri = \
                cwltool.load_tool.validate_document(
                    document_loader, workflowobj, uri,
                    loading_context.enable_dev, loading_context.strict, False,
                    loading_context.fetcher_constructor, False,
                    loading_context.overrides_list,
                    do_validate=loading_context.do_validate)
            loading_context.overrides_list.extend(
                metadata.get("cwltool:overrides", []))
            try:
                tool = cwltool.load_tool.make_tool(
                    document_loader, avsc_names, metadata, uri,
                    loading_context)
            except cwltool.process.UnsupportedRequirement as err:
                logging.error(err)
                return 33
            runtime_context.secret_store = SecretStore()
            initialized_job_order = cwltool.main.init_job_order(
                job_order_object, options, tool, jobloader, sys.stdout,
                secret_store=runtime_context.secret_store)
            fs_access = cwltool.stdfsaccess.StdFsAccess(options.basedir)
            fill_in_defaults(
                tool.tool["inputs"], initialized_job_order, fs_access)

            def path_to_loc(obj):
                if "location" not in obj and "path" in obj:
                    obj["location"] = obj["path"]
                    del obj["path"]

            def import_files(tool):
                visit_class(tool, ("File", "Directory"), path_to_loc)
                visit_class(tool, ("File", ), functools.partial(
                    add_sizes, fs_access))
                normalizeFilesDirs(tool)
                adjustDirObjs(tool, functools.partial(
                    get_listing, fs_access, recursive=True))
                adjustFileObjs(tool, functools.partial(
                    uploadFile, toil.importFile, fileindex, existing,
                    skip_broken=True))

            tool.visit(import_files)

            for inp in tool.tool["inputs"]:
                def set_secondary(fileobj):
                    if isinstance(fileobj, Mapping) \
                            and fileobj.get("class") == "File":
                        if "secondaryFiles" not in fileobj:
                            fileobj["secondaryFiles"] = [
                                {"location": cwltool.builder.substitute(
                                    fileobj["location"], sf), "class": "File"}
                                for sf in inp["secondaryFiles"]]

                    if isinstance(fileobj, MutableSequence):
                        for entry in fileobj:
                            set_secondary(entry)

                if shortname(inp["id"]) in initialized_job_order \
                        and inp.get("secondaryFiles"):
                    set_secondary(initialized_job_order[shortname(inp["id"])])

            import_files(initialized_job_order)
            visitSteps(tool, import_files)

            try:
                runtime_context.use_container = use_container
                runtime_context.tmpdir = os.path.realpath(tmpdir_prefix)
                runtime_context.tmp_outdir_prefix = os.path.realpath(
                    tmp_outdir_prefix)
                runtime_context.job_script_provider = job_script_provider
                runtime_context.force_docker_pull = options.force_docker_pull
                runtime_context.no_match_user = options.no_match_user
                (wf1, _) = makeJob(tool, {}, None, runtime_context)
            except cwltool.process.UnsupportedRequirement as err:
                logging.error(err)
                return 33

            wf1.cwljob = initialized_job_order
            if wf1 is CWLJob:  # Clean up temporary directories only created with CWLJobs.
                wf1.addFollowOnFn(cleanTempDirs, wf1)
            outobj = toil.start(wf1)

        outobj = resolve_indirect(outobj)

        # Stage files. Specify destination bucket if specified in CLI
        # options. If destination bucket not passed in,
        # options.destBucket's value will be None.
        toilStageFiles(
            toil,
            outobj,
            outdir,
            fileindex,
            existing,
            export=True,
            destBucket=options.destBucket)

        if not options.destBucket:
            visit_class(outobj, ("File",), functools.partial(
                compute_checksums, cwltool.stdfsaccess.StdFsAccess("")))

        visit_class(outobj, ("File", ), MutationManager().unset_generation)
        stdout.write(json.dumps(outobj, indent=4))

    return 0
Ejemplo n.º 5
0
    def executor(self, tool, job_order, runtimeContext, **kwargs):
        """Executor method."""
        final_output = []
        final_status = []

        def output_callback(out, processStatus):
            final_status.append(processStatus)
            final_output.append(out)

        if not runtimeContext.basedir:
            raise WorkflowException('`runtimeContext` should contain a '
                                    '`basedir`')

        output_dirs = set()

        if runtimeContext.outdir:
            finaloutdir = os.path.abspath(runtimeContext.outdir)
        else:
            finaloutdir = None
        if runtimeContext.tmp_outdir_prefix:
            runtimeContext.outdir = tempfile.mkdtemp(
                prefix=runtimeContext.tmp_outdir_prefix)
        else:
            runtimeContext.outdir = tempfile.mkdtemp()

        output_dirs.add(runtimeContext.outdir)
        runtimeContext.mutation_manager = MutationManager()

        jobReqs = None
        if "cwl:requirements" in job_order:
            jobReqs = job_order["cwl:requirements"]
        elif ("cwl:defaults" in tool.metadata
              and "cwl:requirements" in tool.metadata["cwl:defaults"]):
            jobReqs = tool.metadata["cwl:defaults"]["cwl:requirements"]
        if jobReqs:
            for req in jobReqs:
                tool.requirements.append(req)

        if not runtimeContext.default_container:
            runtimeContext.default_container = 'frolvlad/alpine-bash'
        runtimeContext.docker_outdir = os.path.join(runtimeContext.working_dir,
                                                    "cwl/docker_outdir")
        runtimeContext.docker_tmpdir = os.path.join(runtimeContext.working_dir,
                                                    "cwl/docker_tmpdir")
        runtimeContext.docker_stagedir = os.path.join(
            runtimeContext.working_dir, "cwl/docker_stagedir")

        jobs = tool.job(job_order, output_callback, runtimeContext)
        try:
            for runnable in jobs:
                if runnable:
                    if runtimeContext.builder:
                        runnable.builder = runtimeContext.builder
                    if runnable.outdir:
                        output_dirs.add(runnable.outdir)
                    runnable.run(runtimeContext)
                else:
                    # log.error(
                    #     "Workflow cannot make any more progress"
                    # )
                    # break
                    time.sleep(1)

        except WorkflowException as e:
            traceback.print_exc()
            raise e
        except Exception as e:
            traceback.print_exc()
            raise WorkflowException(str(e))

        # wait for all processes to finish
        self.wait()

        if final_output and final_output[0] and finaloutdir:
            final_output[0] = relocateOutputs(
                final_output[0], finaloutdir, output_dirs,
                runtimeContext.move_outputs, runtimeContext.make_fs_access(""))

        if runtimeContext.rm_tmpdir:
            cleanIntermediate(output_dirs)

        if final_output and final_status:
            return str(final_output[0]), str(final_status[0])
        else:
            return None, "permanentFail"
Ejemplo n.º 6
0
def execute_workflow_step(workflow,
                          task_id,
                          job_data,
                          cwl_args=None,
                          executor=None):
    """
    Constructs and executes single step workflow based on the "workflow"
    and "task_id". "cwl_args" can be used to update default parameters
    used for loading and runtime contexts. Exports json file with the
    execution results. If the step was evaluated as the one that need to
    be skipped, the output "skipped" will set to True and the step_report
    file will include "nulls". This function doesn't remove any temporary
    data in both success and failure scenarios.
    """

    cwl_args = {} if cwl_args is None else cwl_args
    executor = SingleJobExecutor() if executor is None else executor

    step_tmp_folder, step_cache_folder, step_outputs_folder, step_report = get_temp_folders(
        task_id=task_id, job_data=job_data)

    default_cwl_args = get_default_cwl_args(cwl_args)

    default_cwl_args.update({  # add execution specific parameters
        "tmp_outdir_prefix": step_cache_folder + "/",
        "tmpdir_prefix": step_cache_folder + "/",
        "cidfile_dir": step_tmp_folder,
        "cidfile_prefix": task_id,
        "basedir": os.getcwd(
        ),  # job should already have abs path for inputs, so this is useless
        "outdir": step_outputs_folder
    })

    workflow_step_path = os.path.join(step_tmp_folder,
                                      task_id + "_step_workflow.cwl")

    fast_cwl_step_load(  # will save new worlflow to "workflow_step_path"
        workflow=workflow,
        target_id=task_id,
        cwl_args=default_cwl_args,
        location=workflow_step_path)

    workflow_data = slow_cwl_load(workflow=workflow_step_path,
                                  cwl_args=default_cwl_args)

    skipped = True
    step_outputs = {
        output_id: None
        for output_id, _ in get_items(workflow_data.tool["outputs"])
    }
    if need_to_run(workflow_data, job_data, task_id):
        skipped = False
        _stderr = sys.stderr  # to trick the logger
        sys.stderr = sys.__stderr__
        step_outputs, step_status = executor(workflow_data, job_data,
                                             RuntimeContext(default_cwl_args))
        sys.stderr = _stderr

        if step_status != "success":
            raise ValueError("Failed to run workflow step")

        # To remove "http://commonwl.org/cwltool#generation": 0 (copied from cwltool)
        visit_class(step_outputs, ("File", ),
                    MutationManager().unset_generation)

    dump_json(step_outputs, step_report)

    return step_outputs, step_report, skipped
Ejemplo n.º 7
0
    def cwlmain(
        self,
        argsl=None,  # type: List[str]
        args=None,  # type: argparse.Namespace
        job_order_object=None,  # type: MutableMapping[Text, Any]
        stdin=sys.stdin,  # type: IO[Any]
        stdout=None,  # type: Union[TextIO, codecs.StreamWriter]
        stderr=sys.stderr,  # type: IO[Any]
        versionfunc=versionstring,  # type: Callable[[], Text]
        logger_handler=None,  #
        custom_schema_callback=None,  # type: Callable[[], None]
        executor=None,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
        loadingContext=None,  # type: LoadingContext
        runtimeContext=None  # type: RuntimeContext
    ):  # type: (...) -> int

        if not stdout:
            stdout = codecs.getwriter('utf-8')(sys.stdout)
        _logger.removeHandler(defaultStreamHandler)
        if logger_handler:
            stderr_handler = logger_handler
        else:
            stderr_handler = logging.StreamHandler(stderr)
        _logger.addHandler(stderr_handler)
        try:
            if args is None:
                args = arg_parser().parse_args(argsl)
                if args.workflow and "--outdir" not in argsl:
                    outputPath = args.workflow.split('/')[-1].split('.')[0]
                    setattr(
                        args, "outdir",
                        os.getcwd() + "/" + outputPath + "/" +
                        datetime.datetime.now().strftime('%Y-%m-%d-%H%M'))
            if runtimeContext is None:
                runtimeContext = RuntimeContext(vars(args))
            else:
                runtimeContext = runtimeContext.copy()

            rdflib_logger = logging.getLogger("rdflib.term")
            rdflib_logger.addHandler(stderr_handler)
            rdflib_logger.setLevel(logging.ERROR)
            if args.quiet:
                _logger.setLevel(logging.WARN)
            if runtimeContext.debug:
                _logger.setLevel(logging.DEBUG)
                rdflib_logger.setLevel(logging.DEBUG)
            if args.timestamps:
                formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                              "%Y-%m-%d %H:%M:%S")
                stderr_handler.setFormatter(formatter)
            # version
            if args.version:
                return versionfunc(), 0
            else:
                _logger.info(versionfunc())

            if args.print_supported_versions:
                return "\n".join(supportedCWLversions(args.enable_dev)), 0

            if not args.workflow:
                if os.path.isfile("CWLFile"):
                    setattr(args, "workflow", "CWLFile")
                else:
                    _logger.error("")
                    _logger.error(
                        "CWL document required, no input file was provided")
                    arg_parser().print_help()
                    return "CWL document required, no input file was provided", 1
            if args.relax_path_checks:
                command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

            if args.ga4gh_tool_registries:
                ga4gh_tool_registries[:] = args.ga4gh_tool_registries
            if not args.enable_ga4gh_tool_registry:
                del ga4gh_tool_registries[:]

            if custom_schema_callback:
                custom_schema_callback()
            elif args.enable_ext:
                res = pkg_resources.resource_stream(__name__, 'extensions.yml')
                use_custom_schema("v1.0", "http://commonwl.org/cwltool",
                                  res.read())
                res.close()
            else:
                use_standard_schema("v1.0")

            if loadingContext is None:
                loadingContext = LoadingContext(vars(args))
            else:
                loadingContext = loadingContext.copy()

            loadingContext.disable_js_validation = \
                args.disable_js_validation or (not args.do_validate)
            loadingContext.construct_tool_object = getdefault(
                loadingContext.construct_tool_object,
                workflow.default_make_tool)
            loadingContext.resolver = getdefault(loadingContext.resolver,
                                                 tool_resolver)
            try:
                uri, tool_file_uri = resolve_tool_uri(
                    args.workflow,
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor)
            except:
                return "Can't find file " + args.workflow, 0

            try_again_msg = "" if args.debug else ", try again with --debug for more information"

            try:
                job_order_object, input_basedir, jobloader = load_job_order(
                    args, stdin, loadingContext.fetcher_constructor,
                    loadingContext.overrides_list, tool_file_uri)

                if args.overrides:
                    loadingContext.overrides_list.extend(
                        load_overrides(
                            file_uri(os.path.abspath(args.overrides)),
                            tool_file_uri))

                document_loader, workflowobj, uri = fetch_document(
                    uri,
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor)

                if args.print_deps:
                    # printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                    result = returndeps(workflowobj, document_loader, stdout,
                                        args.relative_deps, uri)
                    return result, 0

                document_loader, avsc_names, processobj, metadata, uri \
                    = validate_document(document_loader, workflowobj, uri,
                                        enable_dev=loadingContext.enable_dev,
                                        strict=loadingContext.strict,
                                        preprocess_only=(args.print_pre or args.pack),
                                        fetcher_constructor=loadingContext.fetcher_constructor,
                                        skip_schemas=args.skip_schemas,
                                        overrides=loadingContext.overrides_list,
                                        do_validate=loadingContext.do_validate)

                if args.print_pre:
                    # stdout.write(json_dumps(processobj, indent=4))
                    return json_dumps(processobj, indent=4), 0

                loadingContext.overrides_list.extend(
                    metadata.get("cwltool:overrides", []))

                tool = make_tool(document_loader, avsc_names, metadata, uri,
                                 loadingContext)
                if args.make_template:
                    yaml.safe_dump(generate_input_template(tool),
                                   sys.stdout,
                                   default_flow_style=False,
                                   indent=4,
                                   block_seq_indent=2)
                    return yaml.safe_dump(generate_input_template(tool),
                                          indent=4), 0

                if args.validate:
                    _logger.info("Tool definition is valid")
                    return "Tool definition is valid", 0

                if args.pack:
                    stdout.write(
                        print_pack(document_loader, processobj, uri, metadata))
                    return print_pack(document_loader, processobj, uri,
                                      metadata), 0

                if args.print_rdf:
                    stdout.write(
                        printrdf(tool, document_loader.ctx,
                                 args.rdf_serializer))
                    return printrdf(tool, document_loader.ctx,
                                    args.rdf_serializer), 0

                if args.print_dot:
                    printdot(tool, document_loader.ctx, stdout)
                    return "args.print_dot still not solved", 0

            except (validate.ValidationException) as exc:
                _logger.error(u"Tool definition failed validation:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Tool definition failed validation:\n%s" + exc + args.debug
                return infor, 1
            except (RuntimeError, WorkflowException) as exc:
                _logger.error(u"Tool definition failed initialization:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Tool definition failed initialization:\n%s" + exc + args.debug
                return infor, 1
            except Exception as exc:
                _logger.error(
                    u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                    try_again_msg,
                    exc if not args.debug else "",
                    exc_info=args.debug)
                return "I'm sorry, I couldn't load this CWL file", 1

            if isinstance(tool, int):
                return tool, 0

            # If on MacOS platform, TMPDIR must be set to be under one of the
            # shared volumes in Docker for Mac
            # More info: https://dockstore.org/docs/faq
            if sys.platform == "darwin":
                default_mac_path = "/private/tmp/docker_tmp"
                if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                    runtimeContext.tmp_outdir_prefix = default_mac_path

            for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix",
                              "cachedir"):
                if getattr(runtimeContext, dirprefix) and getattr(
                        runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                    sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                        else ""
                    setattr(
                        runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) +
                        sl)
                    if not os.path.exists(
                            os.path.dirname(getattr(runtimeContext,
                                                    dirprefix))):
                        try:
                            os.makedirs(
                                os.path.dirname(
                                    getattr(runtimeContext, dirprefix)))
                        except Exception as e:
                            _logger.error("Failed to create directory: %s", e)
                            infor = "Failed to create directory: %s" + e + ""
                            return infor, 1

            if args.cachedir:
                if args.move_outputs == "move":
                    runtimeContext.move_outputs = "copy"
                runtimeContext.tmp_outdir_prefix = args.cachedir

            runtimeContext.secret_store = getdefault(
                runtimeContext.secret_store, SecretStore())

            try:
                initialized_job_order_object = init_job_order(
                    job_order_object,
                    args,
                    tool,
                    jobloader,
                    stdout,
                    print_input_deps=args.print_input_deps,
                    relative_deps=args.relative_deps,
                    input_basedir=input_basedir,
                    secret_store=runtimeContext.secret_store)
            except SystemExit as err:
                return err.code
            if not executor:
                if args.parallel:
                    executor = MultithreadedJobExecutor()
                else:
                    executor = SingleJobExecutor()
            assert executor is not None

            if isinstance(initialized_job_order_object, int):
                return initialized_job_order_object

            try:
                runtimeContext.basedir = input_basedir
                del args.workflow
                del args.job_order

                conf_file = getattr(args,
                                    "beta_dependency_resolvers_configuration",
                                    None)  # Text
                use_conda_dependencies = getattr(args,
                                                 "beta_conda_dependencies",
                                                 None)  # Text

                job_script_provider = None  # type: Optional[DependenciesConfiguration]
                if conf_file or use_conda_dependencies:
                    runtimeContext.job_script_provider = DependenciesConfiguration(
                        args)

                runtimeContext.find_default_container = \
                    functools.partial(find_default_container, args)
                runtimeContext.make_fs_access = getdefault(
                    runtimeContext.make_fs_access, StdFsAccess)

                (out, status) = executor(tool,
                                         initialized_job_order_object,
                                         runtimeContext,
                                         logger=_logger)
                # This is the workflow output, it needs to be written
                if out is not None:

                    def loc_to_path(obj):
                        for field in ("path", "nameext", "nameroot",
                                      "dirname"):
                            if field in obj:
                                del obj[field]
                        if obj["location"].startswith("file://"):
                            obj["path"] = uri_file_path(obj["location"])

                    visit_class(out, ("File", "Directory"), loc_to_path)

                    # Unsetting the Generation fron final output object
                    visit_class(out, ("File", ),
                                MutationManager().unset_generation)

                    if isinstance(out, string_types):
                        stdout.write(out)
                    else:
                        stdout.write(
                            json_dumps(
                                out,
                                indent=4,  # type: ignore
                                ensure_ascii=False))
                    stdout.write("\n")
                    if hasattr(stdout, "flush"):
                        stdout.flush()  # type: ignore

                if status != "success":
                    _logger.warning(u"Final process status is %s", status)
                    infor = "Final process status is %s" + status + ""
                    return infor, 1

                _logger.info(u"Final process status is %s", status)
                return out, status

            except (validate.ValidationException) as exc:
                _logger.error(u"Input object failed validation:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Input object failed validation:\n%s" + exc + args.debug
                return infor, 1
            except UnsupportedRequirement as exc:
                _logger.error(
                    u"Workflow or tool uses unsupported feature:\n%s",
                    exc,
                    exc_info=args.debug)
                infor = "Workflow or tool uses unsupported feature:\n%s" + exc + args.debug
                return infor, 3
            except WorkflowException as exc:
                _logger.error(u"Workflow error%s:\n%s",
                              try_again_msg,
                              strip_dup_lineno(six.text_type(exc)),
                              exc_info=args.debug)
                infor = "Workflow error%s:\n%s" + try_again_msg + strip_dup_lineno(
                    six.text_type(exc)) + args.debug
                return infor, 1
            except Exception as exc:
                _logger.error(u"Unhandled error%s:\n  %s",
                              try_again_msg,
                              exc,
                              exc_info=args.debug)
                infor = "Unhandled error%s:\n  %s" + try_again_msg + exc + args.debug
                return infor, 1

        finally:
            _logger.removeHandler(stderr_handler)
            _logger.addHandler(defaultStreamHandler)