예제 #1
0
파일: main.py 프로젝트: gungorbudak/cwltool
def print_pack(loadingContext: LoadingContext, uri: str,) -> str:
    """Return a CWL serialization of the CWL document in JSON."""
    packed = pack(loadingContext, uri)
    if len(cast(Sized, packed["$graph"])) > 1:
        return json_dumps(packed, indent=4)
    return json_dumps(
        cast(MutableSequence[CWLObjectType], packed["$graph"])[0], indent=4
    )
예제 #2
0
def print_pack(
        loadingContext,  # type: LoadingContext
        uri,  # type: str
):  # type: (...) -> str
    """Return a CWL serialization of the CWL document in JSON."""
    packed = pack(loadingContext, uri)
    if len(packed["$graph"]) > 1:
        return json_dumps(packed, indent=4)
    return json_dumps(packed["$graph"][0], indent=4)
예제 #3
0
 def __eq__(self, actual):
     expected_json = json_dumps(self.expected, sort_keys=True, indent=2)
     actual_json = json_dumps(actual, sort_keys=True, indent=2)
     if expected_json != actual_json:
         raise AssertionError("".join(
             difflib.context_diff(expected_json.splitlines(1),
                                  actual_json.splitlines(1),
                                  fromfile="Expected",
                                  tofile="Actual")))
     return True
예제 #4
0
파일: main.py 프로젝트: illusional/cwltool
def print_pack(
        document_loader,  # type: Loader
        processobj,  # type: CommentedMap
        uri,  # type: str
        metadata,  # type: Dict[str, Any]
):  # type: (...) -> str
    """Return a CWL serialization of the CWL document in JSON."""
    packed = pack(document_loader, processobj, uri, metadata)
    if len(packed["$graph"]) > 1:
        return json_dumps(packed, indent=4)
    return json_dumps(packed["$graph"][0], indent=4)
예제 #5
0
    def create_job(self,
                   builder_job: CWLObjectType,
                   is_output: bool = False) -> CWLObjectType:
        # TODO customise the file
        """Generate the new job object with RO specific relative paths."""
        copied = copy.deepcopy(builder_job)
        relativised_input_objecttemp = {}  # type: CWLObjectType
        self._relativise_files(copied)

        def jdefault(o: Any) -> Dict[Any, Any]:
            return dict(o)

        if is_output:
            rel_path = PurePosixPath(WORKFLOW) / "primary-output.json"
        else:
            rel_path = PurePosixPath(WORKFLOW) / "primary-job.json"
        j = json_dumps(copied, indent=4, ensure_ascii=False, default=jdefault)
        with self.write_bag_file(str(rel_path)) as file_path:
            file_path.write(j + "\n")
        _logger.debug("[provenance] Generated customised job file: %s",
                      rel_path)
        # Generate dictionary with keys as workflow level input IDs and values
        # as
        # 1) for files the relativised location containing hash
        # 2) for other attributes, the actual value.
        for key, value in copied.items():
            if isinstance(value, MutableMapping):
                if value.get("class") in ("File", "Directory"):
                    relativised_input_objecttemp[key] = value
            else:
                relativised_input_objecttemp[key] = value
        self.relativised_input_object.update(
            {k: v
             for k, v in relativised_input_objecttemp.items() if v})
        return self.relativised_input_object
예제 #6
0
    def _setup(self, runtimeContext: RuntimeContext) -> None:
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

        for knownfile in self.pathmapper.files():
            p = self.pathmapper.mapper(knownfile)
            if p.type == "File" and not os.path.isfile(p[0]) and p.staged:
                raise WorkflowException(
                    "Input file %s (at %s) not found or is not a regular "
                    "file." % (knownfile, self.pathmapper.mapper(knownfile)[0])
                )

        if "listing" in self.generatefiles:
            runtimeContext = runtimeContext.copy()
            runtimeContext.outdir = self.outdir
            self.generatemapper = self.make_path_mapper(
                self.generatefiles["listing"],
                self.builder.outdir,
                runtimeContext,
                False,
            )
            if _logger.isEnabledFor(logging.DEBUG):
                _logger.debug(
                    "[job %s] initial work dir %s",
                    self.name,
                    json_dumps(
                        {
                            p: self.generatemapper.mapper(p)
                            for p in self.generatemapper.files()
                        },
                        indent=4,
                    ),
                )
예제 #7
0
    def receive_output(self, step, outputparms, final_output_callback, jobout,
                       processStatus):
        # type: (WorkflowJobStep, List[Dict[str,str]], Callable[[Any, Any], Any], Dict[str,str], str) -> None

        for i in outputparms:
            if "id" in i:
                if i["id"] in jobout:
                    self.state[i["id"]] = WorkflowStateItem(
                        i, jobout[i["id"]], processStatus)
                else:
                    _logger.error("[%s] Output is missing expected field %s",
                                  step.name, i["id"])
                    processStatus = "permanentFail"
        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug("[%s] produced output %s", step.name,
                          json_dumps(jobout, indent=4))

        if processStatus != "success":
            if self.processStatus != "permanentFail":
                self.processStatus = processStatus

            _logger.warning("[%s] completed %s", step.name, processStatus)
        else:
            _logger.info("[%s] completed %s", step.name, processStatus)

        step.completed = True
        # Release the iterable related to this step to
        # reclaim memory.
        step.iterable = None
        self.made_progress = True

        completed = sum(1 for s in self.steps if s.completed)
        if completed == len(self.steps):
            self.do_output_callback(final_output_callback)
예제 #8
0
    def _write_ro_manifest(self) -> None:

        # Does not have to be this order, but it's nice to be consistent
        filename = "manifest.json"
        createdOn, createdBy = self._self_made()
        manifest = OrderedDict({
            "@context": [
                {
                    "@base": "%s%s/" % (self.base_uri, posix_path(METADATA))
                },
                "https://w3id.org/bundle/context",
            ],
            "id":
            "/",
            "conformsTo":
            CWLPROV_VERSION,
            "manifest":
            filename,
            "createdOn":
            createdOn,
            "createdBy":
            createdBy,
            "authoredBy":
            self._authored_by(),
            "aggregates":
            self._ro_aggregates(),
            "annotations":
            self._ro_annotations(),
        })

        json_manifest = json_dumps(manifest, indent=4, ensure_ascii=False)
        rel_path = str(PurePosixPath(METADATA) / filename)
        json_manifest += "\n"
        with self.write_bag_file(rel_path) as manifest_file:
            manifest_file.write(json_manifest)
예제 #9
0
def jshead(engine_config: List[str], rootvars: CWLObjectType) -> str:
    # make sure all the byte strings are converted
    # to str in `rootvars` dict.

    return "\n".join(engine_config + [
        "var {} = {};".format(k, json_dumps(v, indent=4))
        for k, v in rootvars.items()
    ])
예제 #10
0
def _convert_stdstreams_to_files(
    workflowobj: Union[MutableMapping[str, Any],
                       MutableSequence[Union[Dict[str, Any], str, int]], str]
) -> None:
    if isinstance(workflowobj, MutableMapping):
        if workflowobj.get("class") == "CommandLineTool":
            with SourceLine(
                    workflowobj,
                    "outputs",
                    ValidationException,
                    _logger.isEnabledFor(logging.DEBUG),
            ):
                outputs = workflowobj.get("outputs", [])
                if not isinstance(outputs, CommentedSeq):
                    raise ValidationException('"outputs" section is not '
                                              "valid.")
                for out in workflowobj.get("outputs", []):
                    if not isinstance(out, CommentedMap):
                        raise ValidationException(
                            "Output '{}' is not a valid "
                            "OutputParameter.".format(out))
                    for streamtype in ["stdout", "stderr"]:
                        if out.get("type") == streamtype:
                            if "outputBinding" in out:
                                raise ValidationException(
                                    "Not allowed to specify outputBinding when"
                                    " using %s shortcut." % streamtype)
                            if streamtype in workflowobj:
                                filename = workflowobj[streamtype]
                            else:
                                filename = str(
                                    hashlib.sha1(  # nosec
                                        json_dumps(workflowobj,
                                                   sort_keys=True).encode(
                                                       "utf-8")).hexdigest())
                                workflowobj[streamtype] = filename
                            out["type"] = "File"
                            out["outputBinding"] = cmap({"glob": filename})
            for inp in workflowobj.get("inputs", []):
                if inp.get("type") == "stdin":
                    if "inputBinding" in inp:
                        raise ValidationException(
                            "Not allowed to specify inputBinding when"
                            " using stdin shortcut.")
                    if "stdin" in workflowobj:
                        raise ValidationException(
                            "Not allowed to specify stdin path when"
                            " using stdin type shortcut.")
                    else:
                        workflowobj["stdin"] = ("$(inputs.%s.path)" %
                                                inp["id"].rpartition("#")[2])
                        inp["type"] = "File"
        else:
            for entry in workflowobj.values():
                _convert_stdstreams_to_files(entry)
    if isinstance(workflowobj, MutableSequence):
        for entry in workflowobj:
            _convert_stdstreams_to_files(entry)
예제 #11
0
def jshead(engine_config, rootvars):
    # type: (List[str], Dict[str, Any]) -> str

    # make sure all the byte strings are converted
    # to str in `rootvars` dict.

    return "\n".join(engine_config + [
        "var {} = {};".format(k, json_dumps(v, indent=4))
        for k, v in rootvars.items()
    ])
예제 #12
0
def check_format(
    actual_file: Union[CWLObjectType, List[CWLObjectType]],
    input_formats: Union[List[str], str],
    ontology: Optional[Graph],
) -> None:
    """Confirm that the format present is valid for the allowed formats."""
    for afile in aslist(actual_file):
        if not afile:
            continue
        if "format" not in afile:
            raise ValidationException(
                "File has no 'format' defined: {}".format(
                    json_dumps(afile, indent=4)))
        for inpf in aslist(input_formats):
            if afile["format"] == inpf or formatSubclassOf(
                    afile["format"], inpf, ontology, set()):
                return
        raise ValidationException("File has an incompatible format: {}".format(
            json_dumps(afile, indent=4)))
예제 #13
0
def check_format(
        actual_file,  # type: Union[Dict[str, Any], List[Dict[str, Any]], str]
        input_formats,  # type: Union[List[str], str]
        ontology,  # type: Optional[Graph]
):  # type: (...) -> None
    """Confirm that the format present is valid for the allowed formats."""
    for afile in aslist(actual_file):
        if not afile:
            continue
        if "format" not in afile:
            raise validate.ValidationException(
                "File has no 'format' defined: {}".format(
                    json_dumps(afile, indent=4)))
        for inpf in aslist(input_formats):
            if afile["format"] == inpf or formatSubclassOf(
                    afile["format"], inpf, ontology, set()):
                return
        raise validate.ValidationException(
            "File has an incompatible format: {}".format(
                json_dumps(afile, indent=4)))
예제 #14
0
    def _setup(self, runtimeContext: RuntimeContext) -> None:

        cuda_req, _ = self.builder.get_requirement(
            "http://commonwl.org/cwltool#CUDARequirement")
        if cuda_req:
            count = cuda_check(
                cuda_req, math.ceil(self.builder.resources["cudaDeviceCount"]))
            if count == 0:
                raise WorkflowException("Could not satisfy CUDARequirement")

        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

        def is_streamable(file: str) -> bool:
            if not runtimeContext.streaming_allowed:
                return False
            for inp in self.joborder.values():
                if isinstance(inp, dict) and inp.get("location", None) == file:
                    return inp.get("streamable", False)
            return False

        for knownfile in self.pathmapper.files():
            p = self.pathmapper.mapper(knownfile)
            if p.type == "File" and not os.path.isfile(p[0]) and p.staged:
                if not (is_streamable(knownfile)
                        and stat.S_ISFIFO(os.stat(p[0]).st_mode)):
                    raise WorkflowException(
                        "Input file %s (at %s) not found or is not a regular "
                        "file." %
                        (knownfile, self.pathmapper.mapper(knownfile)[0]))

        if "listing" in self.generatefiles:
            runtimeContext = runtimeContext.copy()
            runtimeContext.outdir = self.outdir
            self.generatemapper = self.make_path_mapper(
                self.generatefiles["listing"],
                self.builder.outdir,
                runtimeContext,
                False,
            )
            if _logger.isEnabledFor(logging.DEBUG):
                _logger.debug(
                    "[job %s] initial work dir %s",
                    self.name,
                    json_dumps(
                        {
                            p: self.generatemapper.mapper(p)
                            for p in self.generatemapper.files()
                        },
                        indent=4,
                    ),
                )
        self.base_path_logs = runtimeContext.set_log_dir(
            self.outdir, runtimeContext.log_dir, self.name)
예제 #15
0
    def do_output_callback(self, final_output_callback: OutputCallbackType) -> None:

        supportsMultipleInput = bool(
            self.workflow.get_requirement("MultipleInputFeatureRequirement")[0]
        )

        wo = None  # type: Optional[CWLObjectType]
        try:
            wo = object_from_state(
                self.state,
                self.tool["outputs"],
                True,
                supportsMultipleInput,
                "outputSource",
                incomplete=True,
            )
        except WorkflowException as err:
            _logger.error(
                "[%s] Cannot collect workflow output: %s", self.name, str(err)
            )
            self.processStatus = "permanentFail"
        if (
            self.prov_obj
            and self.parent_wf
            and self.prov_obj.workflow_run_uri != self.parent_wf.workflow_run_uri
        ):
            process_run_id = None  # type: Optional[str]
            self.prov_obj.generate_output_prov(wo or {}, process_run_id, self.name)
            self.prov_obj.document.wasEndedBy(
                self.prov_obj.workflow_run_uri,
                None,
                self.prov_obj.engine_uuid,
                datetime.datetime.now(),
            )
            prov_ids = self.prov_obj.finalize_prov_profile(self.name)
            # Tell parent to associate our provenance files with our wf run
            self.parent_wf.activity_has_provenance(
                self.prov_obj.workflow_run_uri, prov_ids
            )

        _logger.info("[%s] completed %s", self.name, self.processStatus)
        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug("[%s] outputs %s", self.name, json_dumps(wo, indent=4))

        self.did_callback = True

        final_output_callback(wo, self.processStatus)
예제 #16
0
파일: main.py 프로젝트: gungorbudak/cwltool
def printdeps(
    obj: CWLObjectType,
    document_loader: Loader,
    stdout: Union[TextIO, StreamWriter],
    relative_deps: str,
    uri: str,
    basedir: Optional[str] = None,
    nestdirs: bool = True,
) -> None:
    """Print a JSON representation of the dependencies of the CWL document."""
    deps = find_deps(obj, document_loader, uri, basedir=basedir, nestdirs=nestdirs)
    if relative_deps == "primary":
        base = basedir if basedir else os.path.dirname(uri_file_path(str(uri)))
    elif relative_deps == "cwd":
        base = os.getcwd()
    visit_class(deps, ("File", "Directory"), functools.partial(make_relative, base))
    stdout.write(json_dumps(deps, indent=4))
예제 #17
0
def interpolate(
        scan,  # type: str
        rootvars,  # type: Dict[str, Any]
        timeout=default_timeout,  # type: float
        fullJS=False,  # type: bool
        jslib="",  # type: str
        force_docker_pull=False,  # type: bool
        debug=False,  # type: bool
        js_console=False,  # type: bool
        strip_whitespace=True,  # type: bool
):  # type: (...) -> JSON
    if strip_whitespace:
        scan = scan.strip()
    parts = []
    w = scanner(scan)
    while w:
        parts.append(scan[0:w[0]])

        if scan[w[0]] == "$":
            e = evaluator(
                scan[w[0] + 1:w[1]],
                jslib,
                rootvars,
                timeout,
                fullJS=fullJS,
                force_docker_pull=force_docker_pull,
                debug=debug,
                js_console=js_console,
            )
            if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1:
                return e
            leaf = json_dumps(e, sort_keys=True)
            if leaf[0] == '"':
                leaf = leaf[1:-1]
            parts.append(leaf)
        elif scan[w[0]] == "\\":
            e = scan[w[1] - 1]
            parts.append(e)

        scan = scan[w[1]:]
        w = scanner(scan)
    parts.append(scan)
    return "".join(parts)
예제 #18
0
def interpolate(
    scan: str,
    rootvars: CWLObjectType,
    timeout: float = default_timeout,
    fullJS: bool = False,
    jslib: str = "",
    force_docker_pull: bool = False,
    debug: bool = False,
    js_console: bool = False,
    strip_whitespace: bool = True,
) -> Optional[CWLOutputType]:
    if strip_whitespace:
        scan = scan.strip()
    parts = []
    w = scanner(scan)
    while w:
        parts.append(scan[0 : w[0]])

        if scan[w[0]] == "$":
            e = evaluator(
                scan[w[0] + 1 : w[1]],
                jslib,
                rootvars,
                timeout,
                fullJS=fullJS,
                force_docker_pull=force_docker_pull,
                debug=debug,
                js_console=js_console,
            )
            if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1:
                return e
            leaf = json_dumps(e, sort_keys=True)
            if leaf[0] == '"':
                leaf = leaf[1:-1]
            parts.append(leaf)
        elif scan[w[0]] == "\\":
            e = scan[w[1] - 1]
            parts.append(e)

        scan = scan[w[1] :]
        w = scanner(scan)
    parts.append(scan)
    return "".join(parts)
예제 #19
0
    def _setup(self, runtimeContext: RuntimeContext) -> None:
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

        def is_streamable(file: str) -> bool:
            if not runtimeContext.streaming_allowed:
                return False
            for inp in self.joborder.values():
                if isinstance(inp, dict) and inp.get("location", None) == file:
                    return inp.get("streamable", False)
            return False

        for knownfile in self.pathmapper.files():
            p = self.pathmapper.mapper(knownfile)
            if p.type == "File" and not os.path.isfile(p[0]) and p.staged:
                if not (is_streamable(knownfile)
                        and stat.S_ISFIFO(os.stat(p[0]).st_mode)):
                    raise WorkflowException(
                        "Input file %s (at %s) not found or is not a regular "
                        "file." %
                        (knownfile, self.pathmapper.mapper(knownfile)[0]))

        if "listing" in self.generatefiles:
            runtimeContext = runtimeContext.copy()
            runtimeContext.outdir = self.outdir
            self.generatemapper = self.make_path_mapper(
                self.generatefiles["listing"],
                self.builder.outdir,
                runtimeContext,
                False,
            )
            if _logger.isEnabledFor(logging.DEBUG):
                _logger.debug(
                    "[job %s] initial work dir %s",
                    self.name,
                    json_dumps(
                        {
                            p: self.generatemapper.mapper(p)
                            for p in self.generatemapper.files()
                        },
                        indent=4,
                    ),
                )
예제 #20
0
    def receive_output(
        self,
        step: WorkflowJobStep,
        outputparms: List[CWLObjectType],
        final_output_callback: OutputCallbackType,
        jobout: CWLObjectType,
        processStatus: str,
    ) -> None:

        for i in outputparms:
            if "id" in i:
                iid = cast(str, i["id"])
                if iid in jobout:
                    self.state[iid] = WorkflowStateItem(i, jobout[iid], processStatus)
                else:
                    _logger.error(
                        "[%s] Output is missing expected field %s", step.name, iid
                    )
                    processStatus = "permanentFail"
        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug(
                "[%s] produced output %s", step.name, json_dumps(jobout, indent=4)
            )

        if processStatus not in ("success", "skipped"):
            if self.processStatus != "permanentFail":
                self.processStatus = processStatus

            _logger.warning("[%s] completed %s", step.name, processStatus)
        else:
            _logger.info("[%s] completed %s", step.name, processStatus)

        step.completed = True
        # Release the iterable related to this step to
        # reclaim memory.
        step.iterable = None
        self.made_progress = True

        completed = sum(1 for s in self.steps if s.completed)
        if completed == len(self.steps):
            self.do_output_callback(final_output_callback)
예제 #21
0
def printdeps(
        obj,  # type: Mapping[str, Any]
        document_loader,  # type: Loader
        stdout,  # type: Union[TextIO, StreamWriter]
        relative_deps,  # type: bool
        uri,  # type: str
        basedir=None,  # type: Optional[str]
        nestdirs=True,  # type: bool
):  # type: (...) -> None
    """Print a JSON representation of the dependencies of the CWL document."""
    deps = find_deps(obj,
                     document_loader,
                     uri,
                     basedir=basedir,
                     nestdirs=nestdirs)
    if relative_deps == "primary":
        base = basedir if basedir else os.path.dirname(uri_file_path(str(uri)))
    elif relative_deps == "cwd":
        base = os.getcwd()
    visit_class(deps, ("File", "Directory"),
                functools.partial(make_relative, base))
    stdout.write(json_dumps(deps, indent=4))
예제 #22
0
def get_metaschema() -> Tuple[Names, List[Dict[str, str]], Loader]:
    """Instantiate the metaschema."""
    loader = ref_resolver.Loader(
        {
            "Any": saladp + "Any",
            "ArraySchema": saladp + "ArraySchema",
            "Array_symbol": saladp + "ArraySchema/type/Array_symbol",
            "DocType": saladp + "DocType",
            "Documentation": saladp + "Documentation",
            "Documentation_symbol": saladp + "Documentation/type/Documentation_symbol",
            "Documented": saladp + "Documented",
            "EnumSchema": saladp + "EnumSchema",
            "Enum_symbol": saladp + "EnumSchema/type/Enum_symbol",
            "JsonldPredicate": saladp + "JsonldPredicate",
            "NamedType": saladp + "NamedType",
            "PrimitiveType": saladp + "PrimitiveType",
            "RecordField": saladp + "RecordField",
            "RecordSchema": saladp + "RecordSchema",
            "Record_symbol": saladp + "RecordSchema/type/Record_symbol",
            "SaladEnumSchema": saladp + "SaladEnumSchema",
            "SaladRecordField": saladp + "SaladRecordField",
            "SaladRecordSchema": saladp + "SaladRecordSchema",
            "SchemaDefinedType": saladp + "SchemaDefinedType",
            "SpecializeDef": saladp + "SpecializeDef",
            "_container": saladp + "JsonldPredicate/_container",
            "_id": {"@id": saladp + "_id", "@type": "@id", "identity": True},
            "_type": saladp + "JsonldPredicate/_type",
            "abstract": saladp + "SaladRecordSchema/abstract",
            "array": saladp + "array",
            "boolean": "http://www.w3.org/2001/XMLSchema#boolean",
            "dct": "http://purl.org/dc/terms/",
            "default": {"@id": saladp + "default", "noLinkCheck": True},
            "doc": "rdfs:comment",
            "docAfter": {"@id": saladp + "docAfter", "@type": "@id"},
            "docChild": {"@id": saladp + "docChild", "@type": "@id"},
            "docParent": {"@id": saladp + "docParent", "@type": "@id"},
            "documentRoot": saladp + "SchemaDefinedType/documentRoot",
            "documentation": saladp + "documentation",
            "double": "http://www.w3.org/2001/XMLSchema#double",
            "enum": saladp + "enum",
            "extends": {"@id": saladp + "extends", "@type": "@id", "refScope": 1},
            "fields": {
                "@id": saladp + "fields",
                "mapPredicate": "type",
                "mapSubject": "name",
            },
            "float": "http://www.w3.org/2001/XMLSchema#float",
            "identity": saladp + "JsonldPredicate/identity",
            "inVocab": saladp + "NamedType/inVocab",
            "int": "http://www.w3.org/2001/XMLSchema#int",
            "items": {"@id": saladp + "items", "@type": "@vocab", "refScope": 2},
            "jsonldPredicate": "sld:jsonldPredicate",
            "long": "http://www.w3.org/2001/XMLSchema#long",
            "mapPredicate": saladp + "JsonldPredicate/mapPredicate",
            "mapSubject": saladp + "JsonldPredicate/mapSubject",
            "name": "@id",
            "noLinkCheck": saladp + "JsonldPredicate/noLinkCheck",
            "null": saladp + "null",
            "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
            "record": saladp + "record",
            "refScope": saladp + "JsonldPredicate/refScope",
            "sld": saladp,
            "specialize": {
                "@id": saladp + "specialize",
                "mapPredicate": "specializeTo",
                "mapSubject": "specializeFrom",
            },
            "specializeFrom": {
                "@id": saladp + "specializeFrom",
                "@type": "@id",
                "refScope": 1,
            },
            "specializeTo": {
                "@id": saladp + "specializeTo",
                "@type": "@id",
                "refScope": 1,
            },
            "string": "http://www.w3.org/2001/XMLSchema#string",
            "subscope": saladp + "JsonldPredicate/subscope",
            "symbols": {"@id": saladp + "symbols", "@type": "@id", "identity": True},
            "type": {
                "@id": saladp + "type",
                "@type": "@vocab",
                "refScope": 2,
                "typeDSL": True,
            },
            "typeDSL": saladp + "JsonldPredicate/typeDSL",
            "xsd": "http://www.w3.org/2001/XMLSchema#",
        }
    )

    for salad in SALAD_FILES:
        with resource_stream("schema_salad", "metaschema/" + salad) as stream:
            loader.cache["https://w3id.org/cwl/" + salad] = stream.read().decode(
                "UTF-8"
            )

    with resource_stream("schema_salad", "metaschema/metaschema.yml") as stream:
        loader.cache["https://w3id.org/cwl/salad"] = stream.read().decode("UTF-8")

    j = yaml.main.round_trip_load(loader.cache["https://w3id.org/cwl/salad"])
    add_lc_filename(j, "metaschema.yml")
    j2 = loader.resolve_all(j, saladp)[0]

    if not isinstance(j2, list):
        _logger.error("%s", j2)
        raise SchemaParseException("Not a list: {}".format(j2))
    else:
        sch_obj = make_avro(j2, loader)
    try:
        sch_names = make_avro_schema_from_avro(sch_obj)
    except SchemaParseException:
        _logger.error("Metaschema error, avro was:\n%s", json_dumps(sch_obj, indent=4))
        raise
    validate_doc(sch_names, j2, loader, strict=True)
    return (sch_names, j2, loader)
예제 #23
0
def get_metaschema():
    # type: () -> Tuple[Names, List[Dict[Text, Any]], Loader]
    loader = ref_resolver.Loader({
        "Any": "https://w3id.org/cwl/salad#Any",
        "ArraySchema": "https://w3id.org/cwl/salad#ArraySchema",
        "Array_symbol":
        "https://w3id.org/cwl/salad#ArraySchema/type/Array_symbol",
        "DocType": "https://w3id.org/cwl/salad#DocType",
        "Documentation": "https://w3id.org/cwl/salad#Documentation",
        "Documentation_symbol":
        "https://w3id.org/cwl/salad#Documentation/type/Documentation_symbol",
        "Documented": "https://w3id.org/cwl/salad#Documented",
        "EnumSchema": "https://w3id.org/cwl/salad#EnumSchema",
        "Enum_symbol":
        "https://w3id.org/cwl/salad#EnumSchema/type/Enum_symbol",
        "JsonldPredicate": "https://w3id.org/cwl/salad#JsonldPredicate",
        "NamedType": "https://w3id.org/cwl/salad#NamedType",
        "PrimitiveType": "https://w3id.org/cwl/salad#PrimitiveType",
        "RecordField": "https://w3id.org/cwl/salad#RecordField",
        "RecordSchema": "https://w3id.org/cwl/salad#RecordSchema",
        "Record_symbol":
        "https://w3id.org/cwl/salad#RecordSchema/type/Record_symbol",
        "SaladEnumSchema": "https://w3id.org/cwl/salad#SaladEnumSchema",
        "SaladRecordField": "https://w3id.org/cwl/salad#SaladRecordField",
        "SaladRecordSchema": "https://w3id.org/cwl/salad#SaladRecordSchema",
        "SchemaDefinedType": "https://w3id.org/cwl/salad#SchemaDefinedType",
        "SpecializeDef": "https://w3id.org/cwl/salad#SpecializeDef",
        "_container": "https://w3id.org/cwl/salad#JsonldPredicate/_container",
        "_id": {
            "@id": "https://w3id.org/cwl/salad#_id",
            "@type": "@id",
            "identity": True
        },
        "_type": "https://w3id.org/cwl/salad#JsonldPredicate/_type",
        "abstract": "https://w3id.org/cwl/salad#SaladRecordSchema/abstract",
        "array": "https://w3id.org/cwl/salad#array",
        "boolean": "http://www.w3.org/2001/XMLSchema#boolean",
        "dct": "http://purl.org/dc/terms/",
        "default": {
            "@id": "https://w3id.org/cwl/salad#default",
            "noLinkCheck": True
        },
        "doc": "rdfs:comment",
        "docAfter": {
            "@id": "https://w3id.org/cwl/salad#docAfter",
            "@type": "@id"
        },
        "docChild": {
            "@id": "https://w3id.org/cwl/salad#docChild",
            "@type": "@id"
        },
        "docParent": {
            "@id": "https://w3id.org/cwl/salad#docParent",
            "@type": "@id"
        },
        "documentRoot":
        "https://w3id.org/cwl/salad#SchemaDefinedType/documentRoot",
        "documentation": "https://w3id.org/cwl/salad#documentation",
        "double": "http://www.w3.org/2001/XMLSchema#double",
        "enum": "https://w3id.org/cwl/salad#enum",
        "extends": {
            "@id": "https://w3id.org/cwl/salad#extends",
            "@type": "@id",
            "refScope": 1
        },
        "fields": {
            "@id": "https://w3id.org/cwl/salad#fields",
            "mapPredicate": "type",
            "mapSubject": "name"
        },
        "float": "http://www.w3.org/2001/XMLSchema#float",
        "identity": "https://w3id.org/cwl/salad#JsonldPredicate/identity",
        "inVocab": "https://w3id.org/cwl/salad#NamedType/inVocab",
        "int": "http://www.w3.org/2001/XMLSchema#int",
        "items": {
            "@id": "https://w3id.org/cwl/salad#items",
            "@type": "@vocab",
            "refScope": 2
        },
        "jsonldPredicate": "sld:jsonldPredicate",
        "long": "http://www.w3.org/2001/XMLSchema#long",
        "mapPredicate":
        "https://w3id.org/cwl/salad#JsonldPredicate/mapPredicate",
        "mapSubject": "https://w3id.org/cwl/salad#JsonldPredicate/mapSubject",
        "name": "@id",
        "noLinkCheck":
        "https://w3id.org/cwl/salad#JsonldPredicate/noLinkCheck",
        "null": "https://w3id.org/cwl/salad#null",
        "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
        "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
        "record": "https://w3id.org/cwl/salad#record",
        "refScope": "https://w3id.org/cwl/salad#JsonldPredicate/refScope",
        "sld": "https://w3id.org/cwl/salad#",
        "specialize": {
            "@id": "https://w3id.org/cwl/salad#specialize",
            "mapPredicate": "specializeTo",
            "mapSubject": "specializeFrom"
        },
        "specializeFrom": {
            "@id": "https://w3id.org/cwl/salad#specializeFrom",
            "@type": "@id",
            "refScope": 1
        },
        "specializeTo": {
            "@id": "https://w3id.org/cwl/salad#specializeTo",
            "@type": "@id",
            "refScope": 1
        },
        "string": "http://www.w3.org/2001/XMLSchema#string",
        "subscope": "https://w3id.org/cwl/salad#JsonldPredicate/subscope",
        "symbols": {
            "@id": "https://w3id.org/cwl/salad#symbols",
            "@type": "@id",
            "identity": True
        },
        "type": {
            "@id": "https://w3id.org/cwl/salad#type",
            "@type": "@vocab",
            "refScope": 2,
            "typeDSL": True
        },
        "typeDSL": "https://w3id.org/cwl/salad#JsonldPredicate/typeDSL",
        "xsd": "http://www.w3.org/2001/XMLSchema#"
    })

    for f in salad_files:
        rs = resource_stream(__name__, 'metaschema/' + f)
        loader.cache["https://w3id.org/cwl/" + f] = rs.read()
        rs.close()

    rs = resource_stream(__name__, 'metaschema/metaschema.yml')
    loader.cache["https://w3id.org/cwl/salad"] = rs.read()
    rs.close()

    j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"])
    add_lc_filename(j, "metaschema.yml")
    j, _ = loader.resolve_all(j, "https://w3id.org/cwl/salad#")

    (sch_names, sch_obj) = make_avro_schema(j, loader)
    if isinstance(sch_names, Exception):
        _logger.error("Metaschema error, avro was:\n%s",
                      json_dumps(sch_obj, indent=4))
        raise sch_names
    validate_doc(sch_names, j, loader, strict=True)
    return (sch_names, j, loader)
예제 #24
0
def jshint_js(
    js_text: str,
    globals: Optional[List[str]] = None,
    options: Optional[Dict[str, Union[List[str], str, int]]] = None,
) -> JSHintJSReturn:
    if globals is None:
        globals = []
    if options is None:
        options = {
            "includewarnings": [
                "W117",  # <VARIABLE> not defined
                "W104",
                "W119",  # using ES6 features
            ],
            "strict": "implied",
            "esversion": 5,
        }

    with resource_stream(__name__, "jshint/jshint.js") as res:
        # NOTE: we need a global variable for lodash (which jshint depends on)
        jshint_functions_text = "var global = this;" + res.read().decode("utf-8")

    with resource_stream(__name__, "jshint/jshint_wrapper.js") as res2:
        # NOTE: we need to assign to ob, as the expression {validateJS: validateJS} as an expression
        # is interpreted as a block with a label `validateJS`
        jshint_functions_text += (
            "\n"
            + res2.read().decode("utf-8")
            + "\nvar ob = {validateJS: validateJS}; ob"
        )

    returncode, stdout, stderr = exec_js_process(
        "validateJS(%s)"
        % json_dumps({"code": js_text, "options": options, "globals": globals}),
        timeout=30,
        context=jshint_functions_text,
    )

    def dump_jshint_error():
        # type: () -> None
        raise RuntimeError(
            'jshint failed to run succesfully\nreturncode: %d\nstdout: "%s"\nstderr: "%s"'
            % (returncode, stdout, stderr)
        )

    if returncode == -1:
        _logger.warning("jshint process timed out")

    if returncode != 0:
        dump_jshint_error()

    try:
        jshint_json = json.loads(stdout)
    except ValueError:
        dump_jshint_error()

    jshint_errors = []  # type: List[str]

    js_text_lines = js_text.split("\n")

    for jshint_error_obj in jshint_json.get("errors", []):
        text = "JSHINT: " + js_text_lines[jshint_error_obj["line"] - 1] + "\n"
        text += "JSHINT: " + " " * (jshint_error_obj["character"] - 1) + "^\n"
        text += "JSHINT: %s: %s" % (
            jshint_error_obj["code"],
            jshint_error_obj["reason"],
        )
        jshint_errors.append(text)

    return JSHintJSReturn(jshint_errors, jshint_json.get("globals", []))
예제 #25
0
    def _execute(
        self,
        runtime: List[str],
        env: MutableMapping[str, str],
        runtimeContext: RuntimeContext,
        monitor_function=None,  # type: Optional[Callable[[subprocess.Popen[str]], None]]
    ) -> None:
        """Execute the tool, either directly or via script.

        Note: we are now at the point where self.environment is
        ignored. The caller is responsible for correctly splitting that
        into the runtime and env arguments.

        `runtime` is the list of arguments to put at the start of the
        command (e.g. docker run).

        `env` is the enviroment to be set for running the resulting
        command line.
        """
        scr = self.get_requirement("ShellCommandRequirement")[0]

        shouldquote = needs_shell_quoting_re.search
        if scr is not None:
            shouldquote = neverquote

        # If mpi_procs (is not None and > 0) then prepend the
        # appropriate MPI job launch command and flags before the
        # execution.
        if self.mpi_procs:
            menv = runtimeContext.mpi_config
            mpi_runtime = [
                menv.runner,
                menv.nproc_flag,
                str(self.mpi_procs),
            ] + menv.extra_flags
            runtime = mpi_runtime + runtime
            menv.pass_through_env_vars(env)
            menv.set_env_vars(env)

        _logger.info(
            "[job %s] %s$ %s%s%s%s",
            self.name,
            self.outdir,
            " \\\n    ".join(
                [
                    shellescape.quote(str(arg)) if shouldquote(str(arg)) else str(arg)
                    for arg in (runtime + self.command_line)
                ]
            ),
            " < %s" % self.stdin if self.stdin else "",
            " > %s" % os.path.join(self.outdir, self.stdout) if self.stdout else "",
            " 2> %s" % os.path.join(self.outdir, self.stderr) if self.stderr else "",
        )
        if self.joborder is not None and runtimeContext.research_obj is not None:
            job_order = self.joborder
            if (
                runtimeContext.process_run_id is not None
                and runtimeContext.prov_obj is not None
                and isinstance(job_order, (list, dict))
            ):
                runtimeContext.prov_obj.used_artefacts(
                    job_order, runtimeContext.process_run_id, str(self.name)
                )
            else:
                _logger.warning(
                    "research_obj set but one of process_run_id "
                    "or prov_obj is missing from runtimeContext: "
                    "{}".format(runtimeContext)
                )
        outputs = {}  # type: CWLObjectType
        try:
            stdin_path = None
            if self.stdin is not None:
                rmap = self.pathmapper.reversemap(self.stdin)
                if rmap is None:
                    raise WorkflowException(f"{self.stdin} missing from pathmapper")
                else:
                    stdin_path = rmap[1]

            stderr_path = None
            if self.stderr is not None:
                abserr = os.path.join(self.outdir, self.stderr)
                dnerr = os.path.dirname(abserr)
                if dnerr and not os.path.exists(dnerr):
                    os.makedirs(dnerr)
                stderr_path = abserr

            stdout_path = None
            if self.stdout is not None:
                absout = os.path.join(self.outdir, self.stdout)
                dnout = os.path.dirname(absout)
                if dnout and not os.path.exists(dnout):
                    os.makedirs(dnout)
                stdout_path = absout

            commands = [str(x) for x in runtime + self.command_line]
            if runtimeContext.secret_store is not None:
                commands = cast(
                    List[str],
                    runtimeContext.secret_store.retrieve(cast(CWLOutputType, commands)),
                )
                env = cast(
                    MutableMapping[str, str],
                    runtimeContext.secret_store.retrieve(cast(CWLOutputType, env)),
                )

            job_script_contents = None  # type: Optional[str]
            builder = getattr(self, "builder", None)  # type: Builder
            if builder is not None:
                job_script_contents = builder.build_job_script(commands)
            rcode = _job_popen(
                commands,
                stdin_path=stdin_path,
                stdout_path=stdout_path,
                stderr_path=stderr_path,
                env=env,
                cwd=self.outdir,
                make_job_dir=lambda: runtimeContext.create_outdir(),
                job_script_contents=job_script_contents,
                timelimit=self.timelimit,
                name=self.name,
                monitor_function=monitor_function,
                default_stdout=runtimeContext.default_stdout,
                default_stderr=runtimeContext.default_stderr,
            )

            if rcode in self.successCodes:
                processStatus = "success"
            elif rcode in self.temporaryFailCodes:
                processStatus = "temporaryFail"
            elif rcode in self.permanentFailCodes:
                processStatus = "permanentFail"
            elif rcode == 0:
                processStatus = "success"
            else:
                processStatus = "permanentFail"

            if "listing" in self.generatefiles:
                if self.generatemapper:
                    relink_initialworkdir(
                        self.generatemapper,
                        self.outdir,
                        self.builder.outdir,
                        inplace_update=self.inplace_update,
                    )
                else:
                    raise ValueError(
                        "'listing' in self.generatefiles but no "
                        "generatemapper was setup."
                    )

            outputs = self.collect_outputs(self.outdir, rcode)
            outputs = bytes2str_in_dicts(outputs)  # type: ignore
        except OSError as e:
            if e.errno == 2:
                if runtime:
                    _logger.error("'%s' not found: %s", runtime[0], str(e))
                else:
                    _logger.error("'%s' not found: %s", self.command_line[0], str(e))
            else:
                _logger.exception("Exception while running job")
            processStatus = "permanentFail"
        except WorkflowException as err:
            _logger.error("[job %s] Job error:\n%s", self.name, str(err))
            processStatus = "permanentFail"
        except Exception:
            _logger.exception("Exception while running job")
            processStatus = "permanentFail"
        if (
            runtimeContext.research_obj is not None
            and self.prov_obj is not None
            and runtimeContext.process_run_id is not None
        ):
            # creating entities for the outputs produced by each step (in the provenance document)
            self.prov_obj.record_process_end(
                str(self.name),
                runtimeContext.process_run_id,
                outputs,
                datetime.datetime.now(),
            )
        if processStatus != "success":
            _logger.warning("[job %s] completed %s", self.name, processStatus)
        else:
            _logger.info("[job %s] completed %s", self.name, processStatus)

        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug(
                "[job %s] outputs %s", self.name, json_dumps(outputs, indent=4)
            )

        if self.generatemapper is not None and runtimeContext.secret_store is not None:
            # Delete any runtime-generated files containing secrets.
            for _, p in self.generatemapper.items():
                if p.type == "CreateFile":
                    if runtimeContext.secret_store.has_secret(p.resolved):
                        host_outdir = self.outdir
                        container_outdir = self.builder.outdir
                        host_outdir_tgt = p.target
                        if p.target.startswith(container_outdir + "/"):
                            host_outdir_tgt = os.path.join(
                                host_outdir, p.target[len(container_outdir) + 1 :]
                            )
                        os.remove(host_outdir_tgt)

        if runtimeContext.workflow_eval_lock is None:
            raise WorkflowException(
                "runtimeContext.workflow_eval_lock must not be None"
            )

        if self.output_callback:
            with runtimeContext.workflow_eval_lock:
                self.output_callback(outputs, processStatus)

        if self.stagedir is not None and os.path.exists(self.stagedir):
            _logger.debug(
                "[job %s] Removing input staging directory %s",
                self.name,
                self.stagedir,
            )
            shutil.rmtree(self.stagedir, True)

        if runtimeContext.rm_tmpdir:
            _logger.debug(
                "[job %s] Removing temporary directory %s", self.name, self.tmpdir
            )
            shutil.rmtree(self.tmpdir, True)
예제 #26
0
def main(
    argsl: Optional[List[str]] = None,
    args: Optional[argparse.Namespace] = None,
    job_order_object: Optional[CWLObjectType] = None,
    stdin: IO[Any] = sys.stdin,
    stdout: Optional[Union[TextIO, StreamWriter]] = None,
    stderr: IO[Any] = sys.stderr,
    versionfunc: Callable[[], str] = versionstring,
    logger_handler: Optional[logging.Handler] = None,
    custom_schema_callback: Optional[Callable[[], None]] = None,
    executor: Optional[JobExecutor] = None,
    loadingContext: Optional[LoadingContext] = None,
    runtimeContext: Optional[RuntimeContext] = None,
    input_required: bool = True,
) -> int:
    if not stdout:  # force UTF-8 even if the console is configured differently
        if hasattr(sys.stdout,
                   "encoding") and sys.stdout.encoding.upper() not in (
                       "UTF-8",
                       "UTF8",
                   ):
            if hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
            else:
                stdout = getwriter("utf-8")(sys.stdout)  # type: ignore
        else:
            stdout = sys.stdout

    _logger.removeHandler(defaultStreamHandler)
    stderr_handler = logger_handler
    if stderr_handler is not None:
        _logger.addHandler(stderr_handler)
    else:
        coloredlogs.install(logger=_logger, stream=stderr)
        stderr_handler = _logger.handlers[-1]
    workflowobj = None
    prov_log_handler = None  # type: Optional[logging.StreamHandler]
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            addl = []  # type: List[str]
            if "CWLTOOL_OPTIONS" in os.environ:
                addl = os.environ["CWLTOOL_OPTIONS"].split(" ")
            parser = arg_parser()
            argcomplete.autocomplete(parser)
            args = parser.parse_args(addl + argsl)
            if args.record_container_id:
                if not args.cidfile_dir:
                    args.cidfile_dir = os.getcwd()
                del args.record_container_id

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in get_default_args().items():
            if not hasattr(args, key):
                setattr(args, key, val)

        configure_logging(args, stderr_handler, runtimeContext)

        if args.version:
            print(versionfunc())
            return 0
        _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supported_cwl_versions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                args.workflow = "CWLFile"
            else:
                _logger.error(
                    "CWL document required, no input file was provided")
                parser.print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if args.mpi_config_file is not None:
            runtimeContext.mpi_config = MpiConfig.load(args.mpi_config_file)

        setup_schema(args, custom_schema_callback)

        if args.provenance:
            if argsl is None:
                raise Exception("argsl cannot be None")
            if setup_provenance(args, argsl, runtimeContext) is not None:
                return 1

        loadingContext = setup_loadingContext(loadingContext, runtimeContext,
                                              args)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow,
            resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor,
        )

        try_again_msg = ("" if args.debug else
                         ", try again with --debug for more information")

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args,
                stdin,
                loadingContext.fetcher_constructor,
                loadingContext.overrides_list,
                tool_file_uri,
            )

            if args.overrides:
                loadingContext.overrides_list.extend(
                    load_overrides(file_uri(os.path.abspath(args.overrides)),
                                   tool_file_uri))

            loadingContext, workflowobj, uri = fetch_document(
                uri, loadingContext)

            if args.print_deps and loadingContext.loader:
                printdeps(workflowobj, loadingContext.loader, stdout,
                          args.relative_deps, uri)
                return 0

            loadingContext, uri = resolve_and_validate_document(
                loadingContext,
                workflowobj,
                uri,
                preprocess_only=(args.print_pre or args.pack),
                skip_schemas=args.skip_schemas,
            )

            if loadingContext.loader is None:
                raise Exception("Impossible code path.")
            processobj, metadata = loadingContext.loader.resolve_ref(uri)
            processobj = cast(CommentedMap, processobj)
            if args.pack:
                stdout.write(print_pack(loadingContext, uri))
                return 0

            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(loadingContext, uri))

            if args.print_pre:
                stdout.write(
                    json_dumps(processobj,
                               indent=4,
                               sort_keys=True,
                               separators=(",", ": ")))
                return 0

            tool = make_tool(uri, loadingContext)
            if args.make_template:
                make_template(tool)
                return 0

            if args.validate:
                print("{} is valid CWL.".format(args.workflow))
                return 0

            if args.print_rdf:
                stdout.write(
                    printrdf(tool, loadingContext.loader.ctx,
                             args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, loadingContext.loader.ctx, stdout)
                return 0

            if args.print_targets:
                for f in ("outputs", "steps", "inputs"):
                    if tool.tool[f]:
                        _logger.info("%s%s targets:", f[0].upper(), f[1:-1])
                        stdout.write("  " + "\n  ".join(
                            [shortname(t["id"]) for t in tool.tool[f]]) + "\n")
                return 0

            if args.target:
                ctool = choose_target(args, tool, loadingContext)
                if ctool is None:
                    return 1
                else:
                    tool = ctool

            if args.print_subgraph:
                if "name" in tool.tool:
                    del tool.tool["name"]
                stdout.write(
                    json_dumps(tool.tool,
                               indent=4,
                               sort_keys=True,
                               separators=(",", ": ")))
                return 0

        except (ValidationException) as exc:
            _logger.error("Tool definition failed validation:\n%s",
                          str(exc),
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(
                "Tool definition failed initialization:\n%s",
                str(exc),
                exc_info=args.debug,
            )
            return 1
        except Exception as exc:
            _logger.error(
                "I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                str(exc) if not args.debug else "",
                exc_info=args.debug,
            )
            return 1

        if isinstance(tool, int):
            return tool

        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path
            if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmpdir_prefix = default_mac_path

        if check_working_directories(runtimeContext) is not None:
            return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store,
                                                 SecretStore())
        runtimeContext.make_fs_access = getdefault(
            runtimeContext.make_fs_access, StdFsAccess)

        if not executor:
            if args.parallel:
                temp_executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = temp_executor.select_resources
                real_executor = temp_executor  # type: JobExecutor
            else:
                real_executor = SingleJobExecutor()
        else:
            real_executor = executor

        try:
            runtimeContext.basedir = input_basedir

            if isinstance(tool, ProcessGenerator):
                tfjob_order = {}  # type: CWLObjectType
                if loadingContext.jobdefaults:
                    tfjob_order.update(loadingContext.jobdefaults)
                if job_order_object:
                    tfjob_order.update(job_order_object)
                tfout, tfstatus = real_executor(tool.embedded_tool,
                                                tfjob_order, runtimeContext)
                if not tfout or tfstatus != "success":
                    raise WorkflowException(
                        "ProcessGenerator failed to generate workflow")
                tool, job_order_object = tool.result(tfjob_order, tfout,
                                                     runtimeContext)
                if not job_order_object:
                    job_order_object = None

            try:
                initialized_job_order_object = init_job_order(
                    job_order_object,
                    args,
                    tool,
                    jobloader,
                    stdout,
                    print_input_deps=args.print_input_deps,
                    relative_deps=args.relative_deps,
                    make_fs_access=runtimeContext.make_fs_access,
                    input_basedir=input_basedir,
                    secret_store=runtimeContext.secret_store,
                    input_required=input_required,
                )
            except SystemExit as err:
                return err.code

            del args.workflow
            del args.job_order

            conf_file = getattr(args,
                                "beta_dependency_resolvers_configuration",
                                None)  # str
            use_conda_dependencies = getattr(args, "beta_conda_dependencies",
                                             None)  # str

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(
                    args)
            else:
                runtimeContext.find_default_container = functools.partial(
                    find_default_container,
                    default_container=runtimeContext.default_container,
                    use_biocontainers=args.beta_use_biocontainers,
                )

            (out, status) = real_executor(tool,
                                          initialized_job_order_object,
                                          runtimeContext,
                                          logger=_logger)

            if out is not None:
                if runtimeContext.research_obj is not None:
                    runtimeContext.research_obj.create_job(out, True)

                    def remove_at_id(doc: CWLObjectType) -> None:
                        for key in list(doc.keys()):
                            if key == "@id":
                                del doc[key]
                            else:
                                value = doc[key]
                                if isinstance(value, MutableMapping):
                                    remove_at_id(value)
                                elif isinstance(value, MutableSequence):
                                    for entry in value:
                                        if isinstance(entry, MutableMapping):
                                            remove_at_id(entry)

                    remove_at_id(out)
                    visit_class(
                        out,
                        ("File", ),
                        functools.partial(add_sizes,
                                          runtimeContext.make_fs_access("")),
                    )

                def loc_to_path(obj: CWLObjectType) -> None:
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if cast(str, obj["location"]).startswith("file://"):
                        obj["path"] = uri_file_path(cast(str, obj["location"]))

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ),
                            MutationManager().unset_generation)

                if isinstance(out, str):
                    stdout.write(out)
                else:
                    stdout.write(json_dumps(out, indent=4, ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()

            if status != "success":
                _logger.warning("Final process status is %s", status)
                return 1
            _logger.info("Final process status is %s", status)
            return 0

        except (ValidationException) as exc:
            _logger.error("Input object failed validation:\n%s",
                          str(exc),
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                "Workflow or tool uses unsupported feature:\n%s",
                str(exc),
                exc_info=args.debug,
            )
            return 33
        except WorkflowException as exc:
            _logger.error(
                "Workflow error%s:\n%s",
                try_again_msg,
                strip_dup_lineno(str(exc)),
                exc_info=args.debug,
            )
            return 1
        except Exception as exc:  # pylint: disable=broad-except
            _logger.error(
                "Unhandled error%s:\n  %s",
                try_again_msg,
                str(exc),
                exc_info=args.debug,
            )
            return 1

    finally:
        if (args and runtimeContext and runtimeContext.research_obj
                and workflowobj and loadingContext):
            research_obj = runtimeContext.research_obj
            if loadingContext.loader is not None:
                research_obj.generate_snapshot(
                    prov_deps(workflowobj, loadingContext.loader, uri))
            else:
                _logger.warning("Unable to generate provenance snapshot "
                                " due to missing loadingContext.loader.")
            if prov_log_handler is not None:
                # Stop logging so we won't half-log adding ourself to RO
                _logger.debug("[provenance] Closing provenance log file %s",
                              prov_log_handler)
                _logger.removeHandler(prov_log_handler)
                # Ensure last log lines are written out
                prov_log_handler.flush()
                # Underlying WritableBagFile will add the tagfile to the manifest
                prov_log_handler.stream.close()
                prov_log_handler.close()
            research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
예제 #27
0
def init_job_order(
    job_order_object: Optional[CWLObjectType],
    args: argparse.Namespace,
    process: Process,
    loader: Loader,
    stdout: Union[TextIO, StreamWriter],
    print_input_deps: bool = False,
    relative_deps: str = "primary",
    make_fs_access: Callable[[str], StdFsAccess] = StdFsAccess,
    input_basedir: str = "",
    secret_store: Optional[SecretStore] = None,
    input_required: bool = True,
) -> CWLObjectType:
    secrets_req, _ = process.get_requirement(
        "http://commonwl.org/cwltool#Secrets")
    if job_order_object is None:
        namemap = {}  # type: Dict[str, str]
        records = []  # type: List[str]
        toolparser = generate_parser(
            argparse.ArgumentParser(prog=args.workflow),
            process,
            namemap,
            records,
            input_required,
        )
        if args.tool_help:
            toolparser.print_help()
            exit(0)
        cmd_line = vars(toolparser.parse_args(args.job_order))
        for record_name in records:
            record = {}
            record_items = {
                k: v
                for k, v in cmd_line.items() if k.startswith(record_name)
            }
            for key, value in record_items.items():
                record[key[len(record_name) + 1:]] = value
                del cmd_line[key]
            cmd_line[str(record_name)] = record
        if "job_order" in cmd_line and cmd_line["job_order"]:
            try:
                job_order_object = cast(
                    CWLObjectType,
                    loader.resolve_ref(cmd_line["job_order"])[0],
                )
            except Exception:
                _logger.exception("Failed to resolv job_order: %s",
                                  cmd_line["job_order"])
                exit(1)
        else:
            job_order_object = {"id": args.workflow}

        del cmd_line["job_order"]

        job_order_object.update({namemap[k]: v for k, v in cmd_line.items()})

        if secret_store and secrets_req:
            secret_store.store(
                [
                    shortname(sc)
                    for sc in cast(List[str], secrets_req["secrets"])
                ],
                job_order_object,
            )

        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug(
                "Parsed job order from command line: %s",
                json_dumps(job_order_object, indent=4),
            )

    for inp in process.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"])
                                 not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if job_order_object is None:
        if process.tool["inputs"]:
            if toolparser is not None:
                print("\nOptions for {} ".format(args.workflow))
                toolparser.print_help()
            _logger.error("")
            _logger.error("Input object required, use --help for details")
            exit(1)
        else:
            job_order_object = {}

    if print_input_deps:
        basedir = None  # type: Optional[str]
        uri = cast(str, job_order_object["id"])
        if uri == args.workflow:
            basedir = os.path.dirname(uri)
            uri = ""
        printdeps(
            job_order_object,
            loader,
            stdout,
            relative_deps,
            uri,
            basedir=basedir,
            nestdirs=False,
        )
        exit(0)

    def path_to_loc(p: CWLObjectType) -> None:
        if "location" not in p and "path" in p:
            p["location"] = p["path"]
            del p["path"]

    ns = {}  # type: ContextType
    ns.update(cast(ContextType, job_order_object.get("$namespaces", {})))
    ns.update(cast(ContextType, process.metadata.get("$namespaces", {})))
    ld = Loader(ns)

    def expand_formats(p: CWLObjectType) -> None:
        if "format" in p:
            p["format"] = ld.expand_url(cast(str, p["format"]), "")

    visit_class(job_order_object, ("File", "Directory"), path_to_loc)
    visit_class(
        job_order_object,
        ("File", ),
        functools.partial(add_sizes, make_fs_access(input_basedir)),
    )
    visit_class(job_order_object, ("File", ), expand_formats)
    adjustDirObjs(job_order_object, trim_listing)
    normalizeFilesDirs(job_order_object)

    if secret_store and secrets_req:
        secret_store.store(
            [shortname(sc) for sc in cast(List[str], secrets_req["secrets"])],
            job_order_object,
        )

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]
    return job_order_object
예제 #28
0
    def job(
        self,
        joborder: CWLObjectType,
        output_callback: Optional[OutputCallbackType],
        runtimeContext: RuntimeContext,
    ) -> JobsGeneratorType:
        self.state = {}
        self.processStatus = "success"

        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug("[%s] inputs %s", self.name, json_dumps(joborder, indent=4))

        runtimeContext = runtimeContext.copy()
        runtimeContext.outdir = None
        debug = runtimeContext.debug

        for index, inp in enumerate(self.tool["inputs"]):
            with SourceLine(self.tool["inputs"], index, WorkflowException, debug):
                inp_id = shortname(inp["id"])
                if inp_id in joborder:
                    self.state[inp["id"]] = WorkflowStateItem(
                        inp, joborder[inp_id], "success"
                    )
                elif "default" in inp:
                    self.state[inp["id"]] = WorkflowStateItem(
                        inp, inp["default"], "success"
                    )
                else:
                    raise WorkflowException(
                        "Input '%s' not in input object and does not have a "
                        " default value." % (inp["id"])
                    )

        for step in self.steps:
            for out in step.tool["outputs"]:
                self.state[out["id"]] = None

        completed = 0
        while completed < len(self.steps):
            self.made_progress = False

            for step in self.steps:
                if (
                    getdefault(runtimeContext.on_error, "stop") == "stop"
                    and self.processStatus != "success"
                ):
                    break

                if not step.submitted:
                    try:
                        step.iterable = self.try_make_job(
                            step, output_callback, runtimeContext
                        )
                    except WorkflowException as exc:
                        _logger.error("[%s] Cannot make job: %s", step.name, str(exc))
                        _logger.debug("", exc_info=True)
                        self.processStatus = "permanentFail"

                if step.iterable is not None:
                    try:
                        for newjob in step.iterable:
                            if (
                                getdefault(runtimeContext.on_error, "stop") == "stop"
                                and self.processStatus != "success"
                            ):
                                break
                            if newjob is not None:
                                self.made_progress = True
                                yield newjob
                            else:
                                break
                    except WorkflowException as exc:
                        _logger.error("[%s] Cannot make job: %s", step.name, str(exc))
                        _logger.debug("", exc_info=True)
                        self.processStatus = "permanentFail"

            completed = sum(1 for s in self.steps if s.completed)

            if not self.made_progress and completed < len(self.steps):
                if self.processStatus != "success":
                    break
                else:
                    yield None

        if not self.did_callback and output_callback:
            # could have called earlier on line 336;
            self.do_output_callback(output_callback)
예제 #29
0
            def postScatterEval(io: CWLObjectType) -> Optional[CWLObjectType]:
                shortio = cast(CWLObjectType, {shortname(k): v for k, v in io.items()})

                fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)("")
                for k, v in io.items():
                    if k in loadContents:
                        val = cast(CWLObjectType, v)
                        if val.get("contents") is None:
                            with fs_access.open(cast(str, val["location"]), "rb") as f:
                                val["contents"] = content_limit_respected_read(f)

                def valueFromFunc(
                    k: str, v: Optional[CWLOutputType]
                ) -> Optional[CWLOutputType]:
                    if k in valueFrom:
                        adjustDirObjs(
                            v, functools.partial(get_listing, fs_access, recursive=True)
                        )

                        return expression.do_eval(
                            valueFrom[k],
                            shortio,
                            self.workflow.requirements,
                            None,
                            None,
                            {},
                            context=v,
                            debug=runtimeContext.debug,
                            js_console=runtimeContext.js_console,
                            timeout=runtimeContext.eval_timeout,
                            container_engine=container_engine,
                        )
                    return v

                psio = {k: valueFromFunc(k, v) for k, v in io.items()}
                if "when" in step.tool:
                    evalinputs = {shortname(k): v for k, v in psio.items()}
                    whenval = expression.do_eval(
                        step.tool["when"],
                        evalinputs,
                        self.workflow.requirements,
                        None,
                        None,
                        {},
                        context=cast(Optional[CWLObjectType], v),
                        debug=runtimeContext.debug,
                        js_console=runtimeContext.js_console,
                        timeout=runtimeContext.eval_timeout,
                        container_engine=container_engine,
                    )
                    if whenval is True:
                        pass
                    elif whenval is False:
                        _logger.debug(
                            "[%s] conditional %s evaluated to %s",
                            step.name,
                            step.tool["when"],
                            whenval,
                        )
                        _logger.debug(
                            "[%s] inputs was %s",
                            step.name,
                            json_dumps(evalinputs, indent=2),
                        )
                        return None
                    else:
                        raise WorkflowException(
                            "Conditional 'when' must evaluate to 'true' or 'false'"
                        )
                return psio
예제 #30
0
    def try_make_job(
        self,
        step: WorkflowJobStep,
        final_output_callback: Optional[OutputCallbackType],
        runtimeContext: RuntimeContext,
    ) -> JobsGeneratorType:
        container_engine = "docker"
        if runtimeContext.podman:
            container_engine = "podman"
        elif runtimeContext.singularity:
            container_engine = "singularity"
        if step.submitted:
            return

        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        supportsMultipleInput = bool(
            self.workflow.get_requirement("MultipleInputFeatureRequirement")[0]
        )

        try:
            inputobj = object_from_state(
                self.state, inputparms, False, supportsMultipleInput, "source"
            )
            if inputobj is None:
                _logger.debug("[%s] job step %s not ready", self.name, step.id)
                return

            if step.submitted:
                return
            _logger.info("[%s] starting %s", self.name, step.name)

            callback = functools.partial(
                self.receive_output, step, outputparms, final_output_callback
            )

            valueFrom = {
                i["id"]: i["valueFrom"] for i in step.tool["inputs"] if "valueFrom" in i
            }

            loadContents = {
                i["id"] for i in step.tool["inputs"] if i.get("loadContents")
            }

            if len(valueFrom) > 0 and not bool(
                self.workflow.get_requirement("StepInputExpressionRequirement")[0]
            ):
                raise WorkflowException(
                    "Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements"
                )

            vfinputs = {shortname(k): v for k, v in inputobj.items()}

            def postScatterEval(io: CWLObjectType) -> Optional[CWLObjectType]:
                shortio = cast(CWLObjectType, {shortname(k): v for k, v in io.items()})

                fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)("")
                for k, v in io.items():
                    if k in loadContents:
                        val = cast(CWLObjectType, v)
                        if val.get("contents") is None:
                            with fs_access.open(cast(str, val["location"]), "rb") as f:
                                val["contents"] = content_limit_respected_read(f)

                def valueFromFunc(
                    k: str, v: Optional[CWLOutputType]
                ) -> Optional[CWLOutputType]:
                    if k in valueFrom:
                        adjustDirObjs(
                            v, functools.partial(get_listing, fs_access, recursive=True)
                        )

                        return expression.do_eval(
                            valueFrom[k],
                            shortio,
                            self.workflow.requirements,
                            None,
                            None,
                            {},
                            context=v,
                            debug=runtimeContext.debug,
                            js_console=runtimeContext.js_console,
                            timeout=runtimeContext.eval_timeout,
                            container_engine=container_engine,
                        )
                    return v

                psio = {k: valueFromFunc(k, v) for k, v in io.items()}
                if "when" in step.tool:
                    evalinputs = {shortname(k): v for k, v in psio.items()}
                    whenval = expression.do_eval(
                        step.tool["when"],
                        evalinputs,
                        self.workflow.requirements,
                        None,
                        None,
                        {},
                        context=cast(Optional[CWLObjectType], v),
                        debug=runtimeContext.debug,
                        js_console=runtimeContext.js_console,
                        timeout=runtimeContext.eval_timeout,
                        container_engine=container_engine,
                    )
                    if whenval is True:
                        pass
                    elif whenval is False:
                        _logger.debug(
                            "[%s] conditional %s evaluated to %s",
                            step.name,
                            step.tool["when"],
                            whenval,
                        )
                        _logger.debug(
                            "[%s] inputs was %s",
                            step.name,
                            json_dumps(evalinputs, indent=2),
                        )
                        return None
                    else:
                        raise WorkflowException(
                            "Conditional 'when' must evaluate to 'true' or 'false'"
                        )
                return psio

            if "scatter" in step.tool:
                scatter = cast(List[str], aslist(step.tool["scatter"]))
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException(
                        "Must specify scatterMethod when scattering over multiple inputs"
                    )
                runtimeContext = runtimeContext.copy()
                runtimeContext.postScatterEval = postScatterEval

                emptyscatter = [
                    shortname(s) for s in scatter if len(cast(Sized, inputobj[s])) == 0
                ]
                if emptyscatter:
                    _logger.warning(
                        "[job %s] Notice: scattering over empty input in "
                        "'%s'.  All outputs will be empty.",
                        step.name,
                        "', '".join(emptyscatter),
                    )

                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(
                        step, inputobj, scatter, callback, runtimeContext
                    )
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(
                        step, inputobj, scatter, callback, runtimeContext
                    )
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(
                        step, inputobj, scatter, callback, runtimeContext
                    )
            else:
                if _logger.isEnabledFor(logging.DEBUG):
                    _logger.debug(
                        "[%s] job input %s", step.name, json_dumps(inputobj, indent=4)
                    )

                inputobj = postScatterEval(inputobj)
                if inputobj is not None:
                    if _logger.isEnabledFor(logging.DEBUG):
                        _logger.debug(
                            "[%s] evaluated job input to %s",
                            step.name,
                            json_dumps(inputobj, indent=4),
                        )
                    jobs = step.job(inputobj, callback, runtimeContext)
                else:
                    _logger.info("[%s] will be skipped", step.name)
                    callback({k["id"]: None for k in outputparms}, "skipped")
                    step.completed = True
                    jobs = (_ for _ in ())

            step.submitted = True

            yield from jobs
        except WorkflowException:
            raise
        except Exception:
            _logger.exception("Unhandled exception")
            self.processStatus = "permanentFail"
            step.completed = True