def load_cwl(fname):
    """Load and validate CWL file using cwltool
    """
    logger.debug('Loading CWL file "{}"'.format(fname))
    # Fetching, preprocessing and validating cwl

    # Older versions of cwltool
    if legacy_cwltool:
        try:
            (document_loader, workflowobj, uri) = fetch_document(fname)
            (document_loader, _, processobj, metadata, uri) = \
                validate_document(document_loader, workflowobj, uri)
        except TypeError:
            from cwltool.context import LoadingContext, getdefault
            from cwltool import workflow
            from cwltool.resolver import tool_resolver
            from cwltool.load_tool import resolve_tool_uri

            loadingContext = LoadingContext()
            loadingContext.construct_tool_object = getdefault(
                loadingContext.construct_tool_object,
                workflow.default_make_tool)
            loadingContext.resolver = getdefault(loadingContext.resolver,
                                                 tool_resolver)

            uri, tool_file_uri = resolve_tool_uri(
                fname,
                resolver=loadingContext.resolver,
                fetcher_constructor=loadingContext.fetcher_constructor)

            document_loader, workflowobj, uri = fetch_document(
                uri,
                resolver=loadingContext.resolver,
                fetcher_constructor=loadingContext.fetcher_constructor)
            document_loader, avsc_names, processobj, metadata, uri = \
                validate_document(
                    document_loader, workflowobj, uri,
                    loadingContext.overrides_list, {},
                    enable_dev=loadingContext.enable_dev,
                    strict=loadingContext.strict,
                    preprocess_only=False,
                    fetcher_constructor=loadingContext.fetcher_constructor,
                    skip_schemas=False,
                    do_validate=loadingContext.do_validate)
    # Recent versions of cwltool
    else:
        (loading_context, workflowobj, uri) = fetch_document(fname)
        loading_context, uri = resolve_and_validate_document(
            loading_context, workflowobj, uri)
        document_loader = loading_context.loader
        processobj = workflowobj
        metadata = loading_context.metadata

    return document_loader, processobj, metadata, uri
Exemple #2
0
def _run_example(as_dict, out=None):
    if not out:
        out = _examples_path_for("test.cwl")
    abstract_as_dict = from_dict(as_dict)
    with open(out, "w") as f:
        ordered_dump(abstract_as_dict, f)

    check_abstract_def(abstract_as_dict)

    # validate format2 workflows
    enable_dev = "dev" in CWL_VERSION
    loadingContext = LoadingContext()
    loadingContext.enable_dev = enable_dev
    loadingContext.loader = default_loader(
        loadingContext.fetcher_constructor,
        enable_dev=enable_dev,
    )
    loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver)
    loadingContext, workflowobj, uri = fetch_document(out, loadingContext)
    loadingContext, uri = resolve_and_validate_document(
        loadingContext,
        workflowobj,
        uri,
    )
    return abstract_as_dict
Exemple #3
0
 def run(self, loading_context):
     # type: (RuntimeContext) -> None
     self.output_callback(self.job.collect_output_ports(
         self.job.tool["outputs"],
         self.cache_builder,
         self.output_dir,
         getdefault(loading_context.compute_checksum, True)), "success")
Exemple #4
0
    def execute(self, context):

        post_status(context)

        self.cwlwf, it_is_workflow = load_cwl(
            self.dag.default_args["cwl_workflow"], self.dag.default_args)
        self.cwl_step = [
            step for step in self.cwlwf.steps
            if self.task_id == step.id.split("#")[-1]
        ][0] if it_is_workflow else self.cwlwf

        _logger.info('{0}: Running!'.format(self.task_id))

        upstream_task_ids = [t.task_id for t in self.upstream_list] + \
                            ([self.reader_task_id] if self.reader_task_id else [])
        _logger.debug('{0}: Collecting outputs from: \n{1}'.format(
            self.task_id, json.dumps(upstream_task_ids, indent=4)))

        upstream_data = self.xcom_pull(context=context,
                                       task_ids=upstream_task_ids)
        _logger.info('{0}: Upstream data: \n {1}'.format(
            self.task_id, json.dumps(upstream_data, indent=4)))

        promises = {}
        for data in upstream_data:  # upstream_data is an array with { promises and outdir }
            promises = merge(promises, data["promises"])
            if "outdir" in data:
                self.outdir = data["outdir"]

        _d_args = self.dag.default_args

        if not self.outdir:
            self.outdir = _d_args['tmp_folder']

        _logger.debug('{0}: Step inputs: {1}'.format(
            self.task_id, json.dumps(self.cwl_step.tool["inputs"], indent=4)))

        _logger.debug('{0}: Step outputs: {1}'.format(
            self.task_id, json.dumps(self.cwl_step.tool["outputs"], indent=4)))

        jobobj = {}

        for inp in self.cwl_step.tool["inputs"]:
            jobobj_id = shortname(inp["id"]).split("/")[-1]
            source_ids = []
            promises_outputs = []
            try:
                source_field = inp["source"] if it_is_workflow else inp.get(
                    "id")
                source_ids = [shortname(s)
                              for s in source_field] if isinstance(
                                  source_field,
                                  list) else [shortname(source_field)]
                promises_outputs = [
                    promises[source_id] for source_id in source_ids
                    if source_id in promises
                ]
            except:
                _logger.warning(
                    "{0}: Couldn't find source field in step input: {1}".
                    format(self.task_id, json.dumps(inp, indent=4)))

            _logger.info(
                '{0}: For input {1} with source_ids: {2} found upstream outputs: \n{3}'
                .format(self.task_id, jobobj_id, source_ids, promises_outputs))

            if len(promises_outputs) > 1:
                if inp.get("linkMerge", "merge_nested") == "merge_flattened":
                    jobobj[jobobj_id] = flatten(promises_outputs)
                else:
                    jobobj[jobobj_id] = promises_outputs
            # Should also check if [None], because in this case we need to take default value
            elif len(promises_outputs) == 1 and (promises_outputs[0]
                                                 is not None):
                jobobj[jobobj_id] = promises_outputs[0]
            elif "valueFrom" in inp:
                jobobj[jobobj_id] = None
            elif "default" in inp:
                d = copy.copy(inp["default"])
                jobobj[jobobj_id] = d
            else:
                continue

        _logger.debug('{0}: Collected job object: \n {1}'.format(
            self.task_id, json.dumps(jobobj, indent=4)))

        def _post_scatter_eval(shortio, cwl_step):
            _value_from = {
                shortname(i["id"]).split("/")[-1]: i["valueFrom"]
                for i in cwl_step.tool["inputs"] if "valueFrom" in i
            }
            _logger.debug('{0}: Step inputs with valueFrom: \n{1}'.format(
                self.task_id, json.dumps(_value_from, indent=4)))

            def value_from_func(k, v):
                if k in _value_from:
                    return expression.do_eval(_value_from[k],
                                              shortio,
                                              self.cwlwf.tool.get(
                                                  "requirements", []),
                                              None,
                                              None, {},
                                              context=v)
                else:
                    return v

            return {k: value_from_func(k, v) for k, v in shortio.items()}

        job = _post_scatter_eval(jobobj, self.cwl_step)
        _logger.info('{0}: Final job data: \n {1}'.format(
            self.task_id, json.dumps(job, indent=4)))

        _d_args['outdir'] = tempfile.mkdtemp(
            prefix=os.path.join(self.outdir, "step_tmp"))
        _d_args['tmpdir_prefix'] = os.path.join(_d_args['outdir'], 'cwl_tmp_')
        _d_args['tmp_outdir_prefix'] = os.path.join(_d_args['outdir'],
                                                    'cwl_outdir_')

        _d_args["record_container_id"] = True
        _d_args["cidfile_dir"] = _d_args['outdir']
        _d_args["cidfile_prefix"] = self.task_id

        _logger.debug('{0}: Runtime context: \n {1}'.format(self, _d_args))

        executor = SingleJobExecutor()
        runtimeContext = RuntimeContext(_d_args)
        runtimeContext.make_fs_access = getdefault(
            runtimeContext.make_fs_access, StdFsAccess)

        for inp in self.cwl_step.tool["inputs"]:
            if inp.get("not_connected"):
                del job[shortname(inp["id"].split("/")[-1])]

        _stderr = sys.stderr
        sys.stderr = sys.__stderr__
        (output, status) = executor(
            self.cwl_step.embedded_tool if it_is_workflow else self.cwl_step,
            job,
            runtimeContext,
            logger=_logger)
        sys.stderr = _stderr

        if not output and status == "permanentFail":
            raise ValueError

        _logger.debug('{0}: Embedded tool outputs: \n {1}'.format(
            self.task_id, json.dumps(output, indent=4)))

        promises = {}

        for out in self.cwl_step.tool["outputs"]:

            out_id = shortname(out["id"])
            jobout_id = out_id.split("/")[-1]
            try:
                promises[out_id] = output[jobout_id]
            except:
                continue

        # Unsetting the Generation from final output object
        visit_class(promises, ("File", ), MutationManager().unset_generation)

        data = {"promises": promises, "outdir": self.outdir}

        _logger.info('{0}: Output: \n {1}'.format(self.task_id,
                                                  json.dumps(data, indent=4)))

        return data
Exemple #5
0
    def run(self, runtimeContext):
        # type: (RuntimeContext) -> None

        (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
        self.prov_obj = runtimeContext.prov_obj
        img_id = None
        env = cast(MutableMapping[Text, Text], os.environ)
        user_space_docker_cmd = runtimeContext.user_space_docker_cmd
        if docker_req and user_space_docker_cmd:
            # For user-space docker implementations, a local image name or ID
            # takes precedence over a network pull
            if 'dockerImageId' in docker_req:
                img_id = str(docker_req["dockerImageId"])
            elif 'dockerPull' in docker_req:
                img_id = str(docker_req["dockerPull"])
            # else:
            #     raise WorkflowException(SourceLine(docker_req).makeError(
            #         "Docker image must be specified as 'dockerImageId' or "
            #         "'dockerPull' when using user space implementations of "
            #         "Docker"))
        else:
            try:
                if docker_req and runtimeContext.use_container:
                    img_id = str(
                        self.get_from_requirements(
                            docker_req, True, runtimeContext.pull_image,
                            getdefault(runtimeContext.force_docker_pull, False),
                            getdefault(runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)))
                if img_id is None:
                    if self.builder.find_default_container:
                        default_container = self.builder.find_default_container()
                        if default_container:
                            img_id = str(default_container)

                if docker_req and img_id is None and runtimeContext.use_container:
                    raise Exception("Docker image not available")

                if self.prov_obj and img_id and runtimeContext.process_run_id:
                    # TODO: Integrate with record_container_id
                    container_agent = self.prov_obj.document.agent(
                        uuid.uuid4().urn,
                        {"prov:type": PROV["SoftwareAgent"],
                         "cwlprov:image": img_id,
                         "prov:label": "Container execution of image %s" % img_id})
                    # FIXME: img_id is not a sha256 id, it might just be "debian:8"
                    #img_entity = document.entity("nih:sha-256;%s" % img_id,
                    #                  {"prov:label": "Container image %s" % img_id} )
                    # The image is the plan for this activity-agent association
                    #document.wasAssociatedWith(process_run_ID, container_agent, img_entity)
                    self.prov_obj.document.wasAssociatedWith(
                        runtimeContext.process_run_id, container_agent)
            except Exception as err:
                container = "Shifter"
                _logger.debug("%s error", container, exc_info=True)
                if docker_is_req:
                    raise UnsupportedRequirement(
                        "%s is required to run this tool: %s" % (container, err))
                else:
                    raise WorkflowException(
                        "{0} is not available for this tool, try "
                        "--no-container to disable {0}, or install "
                        "a user space Docker replacement like uDocker with "
                        "--user-space-docker-cmd.: {1}".format(container, err))

        self._setup(runtimeContext)
        stageFiles(self.pathmapper, ignoreWritable=True, symLink=True,
            secret_store=runtimeContext.secret_store)
        runtime = self.create_runtime(env, runtimeContext, img_id)
        self._execute(runtime, env, runtimeContext)
Exemple #6
0
    def cwlmain(
        self,
        argsl=None,  # type: List[str]
        args=None,  # type: argparse.Namespace
        job_order_object=None,  # type: MutableMapping[Text, Any]
        stdin=sys.stdin,  # type: IO[Any]
        stdout=None,  # type: Union[TextIO, codecs.StreamWriter]
        stderr=sys.stderr,  # type: IO[Any]
        versionfunc=versionstring,  # type: Callable[[], Text]
        logger_handler=None,  #
        custom_schema_callback=None,  # type: Callable[[], None]
        executor=None,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
        loadingContext=None,  # type: LoadingContext
        runtimeContext=None  # type: RuntimeContext
    ):  # type: (...) -> int

        if not stdout:
            stdout = codecs.getwriter('utf-8')(sys.stdout)
        _logger.removeHandler(defaultStreamHandler)
        if logger_handler:
            stderr_handler = logger_handler
        else:
            stderr_handler = logging.StreamHandler(stderr)
        _logger.addHandler(stderr_handler)
        try:
            if args is None:
                args = arg_parser().parse_args(argsl)
                if args.workflow and "--outdir" not in argsl:
                    outputPath = args.workflow.split('/')[-1].split('.')[0]
                    setattr(
                        args, "outdir",
                        os.getcwd() + "/" + outputPath + "/" +
                        datetime.datetime.now().strftime('%Y-%m-%d-%H%M'))
            if runtimeContext is None:
                runtimeContext = RuntimeContext(vars(args))
            else:
                runtimeContext = runtimeContext.copy()

            rdflib_logger = logging.getLogger("rdflib.term")
            rdflib_logger.addHandler(stderr_handler)
            rdflib_logger.setLevel(logging.ERROR)
            if args.quiet:
                _logger.setLevel(logging.WARN)
            if runtimeContext.debug:
                _logger.setLevel(logging.DEBUG)
                rdflib_logger.setLevel(logging.DEBUG)
            if args.timestamps:
                formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                              "%Y-%m-%d %H:%M:%S")
                stderr_handler.setFormatter(formatter)
            # version
            if args.version:
                return versionfunc(), 0
            else:
                _logger.info(versionfunc())

            if args.print_supported_versions:
                return "\n".join(supportedCWLversions(args.enable_dev)), 0

            if not args.workflow:
                if os.path.isfile("CWLFile"):
                    setattr(args, "workflow", "CWLFile")
                else:
                    _logger.error("")
                    _logger.error(
                        "CWL document required, no input file was provided")
                    arg_parser().print_help()
                    return "CWL document required, no input file was provided", 1
            if args.relax_path_checks:
                command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

            if args.ga4gh_tool_registries:
                ga4gh_tool_registries[:] = args.ga4gh_tool_registries
            if not args.enable_ga4gh_tool_registry:
                del ga4gh_tool_registries[:]

            if custom_schema_callback:
                custom_schema_callback()
            elif args.enable_ext:
                res = pkg_resources.resource_stream(__name__, 'extensions.yml')
                use_custom_schema("v1.0", "http://commonwl.org/cwltool",
                                  res.read())
                res.close()
            else:
                use_standard_schema("v1.0")

            if loadingContext is None:
                loadingContext = LoadingContext(vars(args))
            else:
                loadingContext = loadingContext.copy()

            loadingContext.disable_js_validation = \
                args.disable_js_validation or (not args.do_validate)
            loadingContext.construct_tool_object = getdefault(
                loadingContext.construct_tool_object,
                workflow.default_make_tool)
            loadingContext.resolver = getdefault(loadingContext.resolver,
                                                 tool_resolver)
            try:
                uri, tool_file_uri = resolve_tool_uri(
                    args.workflow,
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor)
            except:
                return "Can't find file " + args.workflow, 0

            try_again_msg = "" if args.debug else ", try again with --debug for more information"

            try:
                job_order_object, input_basedir, jobloader = load_job_order(
                    args, stdin, loadingContext.fetcher_constructor,
                    loadingContext.overrides_list, tool_file_uri)

                if args.overrides:
                    loadingContext.overrides_list.extend(
                        load_overrides(
                            file_uri(os.path.abspath(args.overrides)),
                            tool_file_uri))

                document_loader, workflowobj, uri = fetch_document(
                    uri,
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor)

                if args.print_deps:
                    # printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                    result = returndeps(workflowobj, document_loader, stdout,
                                        args.relative_deps, uri)
                    return result, 0

                document_loader, avsc_names, processobj, metadata, uri \
                    = validate_document(document_loader, workflowobj, uri,
                                        enable_dev=loadingContext.enable_dev,
                                        strict=loadingContext.strict,
                                        preprocess_only=(args.print_pre or args.pack),
                                        fetcher_constructor=loadingContext.fetcher_constructor,
                                        skip_schemas=args.skip_schemas,
                                        overrides=loadingContext.overrides_list,
                                        do_validate=loadingContext.do_validate)

                if args.print_pre:
                    # stdout.write(json_dumps(processobj, indent=4))
                    return json_dumps(processobj, indent=4), 0

                loadingContext.overrides_list.extend(
                    metadata.get("cwltool:overrides", []))

                tool = make_tool(document_loader, avsc_names, metadata, uri,
                                 loadingContext)
                if args.make_template:
                    yaml.safe_dump(generate_input_template(tool),
                                   sys.stdout,
                                   default_flow_style=False,
                                   indent=4,
                                   block_seq_indent=2)
                    return yaml.safe_dump(generate_input_template(tool),
                                          indent=4), 0

                if args.validate:
                    _logger.info("Tool definition is valid")
                    return "Tool definition is valid", 0

                if args.pack:
                    stdout.write(
                        print_pack(document_loader, processobj, uri, metadata))
                    return print_pack(document_loader, processobj, uri,
                                      metadata), 0

                if args.print_rdf:
                    stdout.write(
                        printrdf(tool, document_loader.ctx,
                                 args.rdf_serializer))
                    return printrdf(tool, document_loader.ctx,
                                    args.rdf_serializer), 0

                if args.print_dot:
                    printdot(tool, document_loader.ctx, stdout)
                    return "args.print_dot still not solved", 0

            except (validate.ValidationException) as exc:
                _logger.error(u"Tool definition failed validation:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Tool definition failed validation:\n%s" + exc + args.debug
                return infor, 1
            except (RuntimeError, WorkflowException) as exc:
                _logger.error(u"Tool definition failed initialization:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Tool definition failed initialization:\n%s" + exc + args.debug
                return infor, 1
            except Exception as exc:
                _logger.error(
                    u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                    try_again_msg,
                    exc if not args.debug else "",
                    exc_info=args.debug)
                return "I'm sorry, I couldn't load this CWL file", 1

            if isinstance(tool, int):
                return tool, 0

            # If on MacOS platform, TMPDIR must be set to be under one of the
            # shared volumes in Docker for Mac
            # More info: https://dockstore.org/docs/faq
            if sys.platform == "darwin":
                default_mac_path = "/private/tmp/docker_tmp"
                if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                    runtimeContext.tmp_outdir_prefix = default_mac_path

            for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix",
                              "cachedir"):
                if getattr(runtimeContext, dirprefix) and getattr(
                        runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                    sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                        else ""
                    setattr(
                        runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) +
                        sl)
                    if not os.path.exists(
                            os.path.dirname(getattr(runtimeContext,
                                                    dirprefix))):
                        try:
                            os.makedirs(
                                os.path.dirname(
                                    getattr(runtimeContext, dirprefix)))
                        except Exception as e:
                            _logger.error("Failed to create directory: %s", e)
                            infor = "Failed to create directory: %s" + e + ""
                            return infor, 1

            if args.cachedir:
                if args.move_outputs == "move":
                    runtimeContext.move_outputs = "copy"
                runtimeContext.tmp_outdir_prefix = args.cachedir

            runtimeContext.secret_store = getdefault(
                runtimeContext.secret_store, SecretStore())

            try:
                initialized_job_order_object = init_job_order(
                    job_order_object,
                    args,
                    tool,
                    jobloader,
                    stdout,
                    print_input_deps=args.print_input_deps,
                    relative_deps=args.relative_deps,
                    input_basedir=input_basedir,
                    secret_store=runtimeContext.secret_store)
            except SystemExit as err:
                return err.code
            if not executor:
                if args.parallel:
                    executor = MultithreadedJobExecutor()
                else:
                    executor = SingleJobExecutor()
            assert executor is not None

            if isinstance(initialized_job_order_object, int):
                return initialized_job_order_object

            try:
                runtimeContext.basedir = input_basedir
                del args.workflow
                del args.job_order

                conf_file = getattr(args,
                                    "beta_dependency_resolvers_configuration",
                                    None)  # Text
                use_conda_dependencies = getattr(args,
                                                 "beta_conda_dependencies",
                                                 None)  # Text

                job_script_provider = None  # type: Optional[DependenciesConfiguration]
                if conf_file or use_conda_dependencies:
                    runtimeContext.job_script_provider = DependenciesConfiguration(
                        args)

                runtimeContext.find_default_container = \
                    functools.partial(find_default_container, args)
                runtimeContext.make_fs_access = getdefault(
                    runtimeContext.make_fs_access, StdFsAccess)

                (out, status) = executor(tool,
                                         initialized_job_order_object,
                                         runtimeContext,
                                         logger=_logger)
                # This is the workflow output, it needs to be written
                if out is not None:

                    def loc_to_path(obj):
                        for field in ("path", "nameext", "nameroot",
                                      "dirname"):
                            if field in obj:
                                del obj[field]
                        if obj["location"].startswith("file://"):
                            obj["path"] = uri_file_path(obj["location"])

                    visit_class(out, ("File", "Directory"), loc_to_path)

                    # Unsetting the Generation fron final output object
                    visit_class(out, ("File", ),
                                MutationManager().unset_generation)

                    if isinstance(out, string_types):
                        stdout.write(out)
                    else:
                        stdout.write(
                            json_dumps(
                                out,
                                indent=4,  # type: ignore
                                ensure_ascii=False))
                    stdout.write("\n")
                    if hasattr(stdout, "flush"):
                        stdout.flush()  # type: ignore

                if status != "success":
                    _logger.warning(u"Final process status is %s", status)
                    infor = "Final process status is %s" + status + ""
                    return infor, 1

                _logger.info(u"Final process status is %s", status)
                return out, status

            except (validate.ValidationException) as exc:
                _logger.error(u"Input object failed validation:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Input object failed validation:\n%s" + exc + args.debug
                return infor, 1
            except UnsupportedRequirement as exc:
                _logger.error(
                    u"Workflow or tool uses unsupported feature:\n%s",
                    exc,
                    exc_info=args.debug)
                infor = "Workflow or tool uses unsupported feature:\n%s" + exc + args.debug
                return infor, 3
            except WorkflowException as exc:
                _logger.error(u"Workflow error%s:\n%s",
                              try_again_msg,
                              strip_dup_lineno(six.text_type(exc)),
                              exc_info=args.debug)
                infor = "Workflow error%s:\n%s" + try_again_msg + strip_dup_lineno(
                    six.text_type(exc)) + args.debug
                return infor, 1
            except Exception as exc:
                _logger.error(u"Unhandled error%s:\n  %s",
                              try_again_msg,
                              exc,
                              exc_info=args.debug)
                infor = "Unhandled error%s:\n  %s" + try_again_msg + exc + args.debug
                return infor, 1

        finally:
            _logger.removeHandler(stderr_handler)
            _logger.addHandler(defaultStreamHandler)
    def execute(self, context):
        logging.info('Running tool: \n{}'.format(
            json.dumps(self.cwl_step.tool, indent=4)))
        collected_outputs = {}
        for task_outputs in self.xcom_pull(
                context=context,
                task_ids=[task.task_id for task in self.upstream_list]):
            collected_outputs = merge(collected_outputs,
                                      task_outputs["outputs"])
        logging.debug('Collected outputs:\n{}'.format(
            json.dumps(collected_outputs, indent=4)))

        jobobj = {}

        for inp in self.cwl_step.tool["inputs"]:
            jobobj_id = shortname(inp["id"]).split("/")[-1]
            source_ids = []
            promises_outputs = []
            try:
                source_ids = [shortname(source)
                              for source in inp["source"]] if isinstance(
                                  inp["source"],
                                  list) else [shortname(inp["source"])]
                promises_outputs = [
                    collected_outputs[source_id] for source_id in source_ids
                    if source_id in collected_outputs
                ]
            except Exception as ex:
                logging.info(
                    "Couldn't find source field in the step input: \n{}".
                    format(json.dumps(inp, indent=4)))
            logging.info(
                'For input {} with sources: \n{} \nfound upstream outputs: \n{}'
                .format(jobobj_id, source_ids, promises_outputs))
            if len(promises_outputs) > 1:
                if inp.get("linkMerge", "merge_nested") == "merge_flattened":
                    jobobj[jobobj_id] = flatten(promises_outputs)
                else:
                    jobobj[jobobj_id] = promises_outputs
            elif len(promises_outputs) == 1 and (
                    promises_outputs[0] is not None
            ):  # Should also check if [None], because in this case we need to take default value
                jobobj[jobobj_id] = promises_outputs[0]
            elif "valueFrom" in inp:
                jobobj[jobobj_id] = None
            elif "default" in inp:
                d = copy.copy(inp["default"])
                jobobj[jobobj_id] = d
            else:
                continue

        logging.info('Collected job object: \n{}'.format(
            json.dumps(jobobj, indent=4)))

        valueFrom = {
            shortname(i["id"]).split("/")[-1]: i["valueFrom"]
            for i in self.cwl_step.tool["inputs"] if "valueFrom" in i
        }

        logging.info('Inputs with valueFrom: \n{}'.format(
            json.dumps(valueFrom, indent=4)))

        def postScatterEval(shortio):
            def valueFromFunc(k, v):
                if k in valueFrom:
                    return cwltool.workflow.expression.do_eval(
                        valueFrom[k],
                        shortio,
                        self.dag.requirements,
                        None,
                        None, {},
                        context=v)
                else:
                    return v

            return {k: valueFromFunc(k, v) for k, v in shortio.items()}

        job = postScatterEval(jobobj)
        logging.info(
            'Collected job object after valueFrom evaluation: \n{}'.format(
                json.dumps(job, indent=4)))
        # maybe need to add here scatter functionality too

        kwargs = self.dag.default_args
        tmp_folder = collected_outputs["tmp_folder"]
        output_folder = collected_outputs["output_folder"]
        kwargs['outdir'] = tempfile.mkdtemp(dir=tmp_folder, prefix="step_tmp_")
        kwargs['tmpdir_prefix'] = os.path.join(tmp_folder, "cwl_tmp_")
        kwargs['tmp_outdir_prefix'] = os.path.join(tmp_folder,
                                                   "cwl_outdir_tmp_")
        kwargs['rm_tmpdir'] = False
        kwargs["basedir"] = os.path.abspath(
            os.path.dirname(self.dag.default_args["job_data"]["path"]))

        logger = logging.getLogger("cwltool")
        sys.stdout = StreamLogWriterUpdated(logger, logging.INFO)
        sys.stderr = StreamLogWriterUpdated(logger, logging.WARN)

        executor = cwltool.executors.SingleJobExecutor()
        runtimeContext = RuntimeContext(kwargs)
        runtimeContext.make_fs_access = getdefault(
            runtimeContext.make_fs_access, cwltool.stdfsaccess.StdFsAccess)

        for inp in self.cwl_step.tool["inputs"]:
            if inp.get("not_connected"):
                del job[shortname(inp["id"].split("/")[-1])]

        (output, status) = executor(self.cwl_step.embedded_tool,
                                    job,
                                    runtimeContext,
                                    logger=logger)

        if not output and status == "permanentFail":
            raise ValueError

        logging.debug('Embedded tool outputs: \n{}'.format(
            json.dumps(output, indent=4)))

        promises = {}
        for out in self.cwl_step.tool["outputs"]:
            out_id = shortname(out["id"])
            jobout_id = out_id.split("/")[-1]
            try:
                promises[out_id] = output[jobout_id]
            except:
                continue

        promises["tmp_folder"] = tmp_folder
        promises["output_folder"] = output_folder
        data = {"outputs": promises}

        logging.info('Outputs: \n{}'.format(json.dumps(data, indent=4)))

        return data
Exemple #8
0
def _parsl_execute(
    self,
    runtime,  # type: List[Text]
    env,  # type: MutableMapping[Text, Text]
    runtimeContext  # type: RuntimeContext
):  # type: (...) -> None

    scr, _ = self.get_requirement("ShellCommandRequirement")

    shouldquote = needs_shell_quoting_re.search  # type: Callable[[Any], Any]
    if scr:
        shouldquote = lambda x: False

    _logger.info(
        u"[job %s] %s$ %s%s%s%s", self.name, self.outdir, " \\\n    ".join([
            shellescape.quote(Text(arg))
            if shouldquote(Text(arg)) else Text(arg)
            for arg in (runtime + self.command_line)
        ]), u' < %s' % self.stdin if self.stdin else '', u' > %s' %
        os.path.join(self.outdir, self.stdout) if self.stdout else '',
        u' 2> %s' %
        os.path.join(self.outdir, self.stderr) if self.stderr else '')
    if self.joborder and runtimeContext.research_obj:
        job_order = self.joborder
        assert runtimeContext.prov_obj
        runtimeContext.prov_obj.used_artefacts(
            job_order, runtimeContext.process_run_id,
            runtimeContext.reference_locations, str(self.name))
    outputs = {}  # type: Dict[Text,Text]
    try:
        stdin_path = None
        if self.stdin:
            rmap = self.pathmapper.reversemap(self.stdin)
            if not rmap:
                raise WorkflowException("{} missing from pathmapper".format(
                    self.stdin))
            else:
                stdin_path = rmap[1]

        stderr_path = None
        if self.stderr:
            abserr = os.path.join(self.outdir, self.stderr)
            dnerr = os.path.dirname(abserr)
            if dnerr and not os.path.exists(dnerr):
                os.makedirs(dnerr)
            stderr_path = abserr

        stdout_path = None
        if self.stdout:
            absout = os.path.join(self.outdir, self.stdout)
            dn = os.path.dirname(absout)
            if dn and not os.path.exists(dn):
                os.makedirs(dn)
            stdout_path = absout

        commands = [Text(x) for x in (runtime + self.command_line)]
        if runtimeContext.secret_store:
            commands = runtimeContext.secret_store.retrieve(commands)
            env = runtimeContext.secret_store.retrieve(env)

        job_script_contents = None  # type: Optional[Text]
        builder = getattr(self, "builder", None)  # type: Builder
        if builder is not None:
            job_script_contents = builder.build_job_script(commands)

        print("Running my own execution layer")
        rcode = _job_popen(
            commands,
            stdin_path=stdin_path,
            stdout_path=stdout_path,
            stderr_path=stderr_path,
            env=env,
            cwd=self.outdir,
            job_dir=tempfile.mkdtemp(prefix=getdefault(
                runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)),
            job_script_contents=job_script_contents,
            timelimit=self.timelimit,
            name=self.name)

        if self.successCodes and rcode in self.successCodes:
            processStatus = "success"
        elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
            processStatus = "temporaryFail"
        elif self.permanentFailCodes and rcode in self.permanentFailCodes:
            processStatus = "permanentFail"
        elif rcode == 0:
            processStatus = "success"
        else:
            processStatus = "permanentFail"

        if self.generatefiles["listing"]:
            assert self.generatemapper is not None
            relink_initialworkdir(self.generatemapper,
                                  self.outdir,
                                  self.builder.outdir,
                                  inplace_update=self.inplace_update)

        outputs = self.collect_outputs(self.outdir)
        outputs = bytes2str_in_dicts(outputs)  # type: ignore
    except OSError as e:
        if e.errno == 2:
            if runtime:
                _logger.error(u"'%s' not found", runtime[0])
            else:
                _logger.error(u"'%s' not found", self.command_line[0])
        else:
            _logger.exception("Exception while running job")
        processStatus = "permanentFail"
    except WorkflowException as e:
        _logger.error(u"[job %s] Job error:\n%s" % (self.name, e))
        processStatus = "permanentFail"
    except Exception as e:
        _logger.exception("Exception while running job")
        processStatus = "permanentFail"
    if runtimeContext.research_obj and self.prov_obj and \
            runtimeContext.process_run_id:
        #creating entities for the outputs produced by each step (in the provenance document)
        self.prov_obj.generate_output_prov(outputs,
                                           runtimeContext.process_run_id,
                                           str(self.name))
        self.prov_obj.document.wasEndedBy(runtimeContext.process_run_id, None,
                                          self.prov_obj.workflow_run_uri,
                                          datetime.datetime.now())
    if processStatus != "success":
        _logger.warning(u"[job %s] completed %s", self.name, processStatus)
    else:
        _logger.info(u"[job %s] completed %s", self.name, processStatus)

    if _logger.isEnabledFor(logging.DEBUG):
        _logger.debug(u"[job %s] %s", self.name, json_dumps(outputs, indent=4))

    if self.generatemapper and runtimeContext.secret_store:
        # Delete any runtime-generated files containing secrets.
        for f, p in self.generatemapper.items():
            if p.type == "CreateFile":
                if runtimeContext.secret_store.has_secret(p.resolved):
                    host_outdir = self.outdir
                    container_outdir = self.builder.outdir
                    host_outdir_tgt = p.target
                    if p.target.startswith(container_outdir + "/"):
                        host_outdir_tgt = os.path.join(
                            host_outdir, p.target[len(container_outdir) + 1:])
                    os.remove(host_outdir_tgt)

    if runtimeContext.workflow_eval_lock is None:
        raise WorkflowException(
            "runtimeContext.workflow_eval_lock must not be None")

    with runtimeContext.workflow_eval_lock:
        self.output_callback(outputs, processStatus)

    if self.stagedir and os.path.exists(self.stagedir):
        _logger.debug(u"[job %s] Removing input staging directory %s",
                      self.name, self.stagedir)
        shutil.rmtree(self.stagedir, True)

    if runtimeContext.rm_tmpdir:
        _logger.debug(u"[job %s] Removing temporary directory %s", self.name,
                      self.tmpdir)
        shutil.rmtree(self.tmpdir, True)
Exemple #9
0
    def job(
        self,
        joborder,  # type: Dict[Text, AnyValue]
        output_callbacks,  # type: Callable[[Any, Any], Any]
        runtime_context,  # type: RuntimeContext
    ):  # type: (...) -> Generator[Union[JobBase, CallbackJob], None, None]
        """
        Workflow job generator.

        :param joborder: inputs of the job submission
        :param output_callbacks: method to fetch step outputs and corresponding step details
        :param runtime_context: configs about execution environment
        :return:
        """
        require_prefix = ""
        if self.metadata["cwlVersion"] == "v1.0":
            require_prefix = "http://commonwl.org/cwltool#"

        jobname = uniquename(runtime_context.name
                             or shortname(self.tool.get("id", "job")))

        # outdir must be served by the EMS because downstream step will need access to upstream steps output
        weaver_out_dir = get_wps_output_dir(get_settings(app))
        runtime_context.outdir = tempfile.mkdtemp(prefix=getdefault(
            runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX),
                                                  dir=weaver_out_dir)
        builder = self._init_job(joborder, runtime_context)

        # `jobname` is the step name and `joborder` is the actual step inputs
        wps_workflow_job = WpsWorkflowJob(
            builder, builder.job, self.requirements, self.hints, jobname,
            self.get_job_process_definition(jobname, joborder, self.tool),
            self.tool["outputs"])
        wps_workflow_job.prov_obj = self.prov_obj
        wps_workflow_job.successCodes = self.tool.get("successCodes")
        wps_workflow_job.temporaryFailCodes = self.tool.get(
            "temporaryFailCodes")
        wps_workflow_job.permanentFailCodes = self.tool.get(
            "permanentFailCodes")

        # TODO Taken from command_line_tool.py maybe this could let us use the revmap if required at all
        # reffiles = copy.deepcopy(builder.files)
        # builder.pathmapper = self.make_path_mapper(
        #     reffiles, builder.stagedir, runtimeContext, True)
        # builder.requirements = wps_workflow_job.requirements

        wps_workflow_job.outdir = builder.outdir
        wps_workflow_job.tmpdir = builder.tmpdir
        wps_workflow_job.stagedir = builder.stagedir

        readers = {}  # type: Dict[Text, Any]
        timelimit = self.get_requirement(require_prefix + "TimeLimit")[0]
        if timelimit:
            with SourceLine(timelimit, "timelimit",
                            validate.ValidationException):
                wps_workflow_job.timelimit = builder.do_eval(
                    timelimit["timelimit"])
                if not isinstance(wps_workflow_job.timelimit,
                                  int) or wps_workflow_job.timelimit < 0:
                    raise Exception(
                        "timelimit must be an integer >= 0, got: %s" %
                        wps_workflow_job.timelimit)

        wps_workflow_job.collect_outputs = partial(
            self.collect_output_ports,
            self.tool["outputs"],
            builder,
            compute_checksum=getdefault(runtime_context.compute_checksum,
                                        True),
            jobname=jobname,
            readers=readers)
        wps_workflow_job.output_callback = output_callbacks

        yield wps_workflow_job