Example #1
0
def test_get_subgraph():
    loadingContext = LoadingContext({"construct_tool_object": default_make_tool})
    wf = norm(Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri())
    loadingContext.do_update = False
    tool = load_tool(wf, loadingContext)

    sg = norm(Path(get_data("tests/subgraph")).as_uri())

    def clean(val):
        if isinstance(val, str):
            if val.startswith(sg):
                return val[len(sg) + 1 :]
        if isinstance(val, dict):
            return {k: clean(v) for k, v in val.items()}
        if isinstance(val, list):
            return [clean(v) for v in val]
        return val

    for a in (
        "file1",
        "file2",
        "file3",
        "count_output",
        "output3",
        "output4",
        "output5",
        "step1",
        "step2",
        "step3",
        "step4",
        "step5",
    ):
        extracted = get_subgraph([wf + "#" + a], tool)
        with open(get_data("tests/subgraph/extract_" + a + ".json")) as f:
            assert json.load(f) == clean(convert_to_dict(extracted))
Example #2
0
def test_get_step() -> None:
    loadingContext = LoadingContext(
        {"construct_tool_object": default_make_tool})
    wf = Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri()
    loadingContext.do_update = False
    tool = load_tool(wf, loadingContext)
    assert isinstance(tool, Workflow)

    sg = Path(get_data("tests/subgraph")).as_uri()

    def clean(val: Any) -> Any:
        if isinstance(val, str):
            if val.startswith(sg):
                return val[len(sg) + 1:]
        if isinstance(val, dict):
            return {k: clean(v) for k, v in val.items()}
        if isinstance(val, list):
            return [clean(v) for v in val]
        return val

    for a in (
            "step1",
            "step2",
            "step3",
            "step4",
            "step5",
    ):
        extracted = get_step(tool, wf + "#" + a)
        with open(get_data("tests/subgraph/single_" + a + ".json")) as f:
            assert json.load(f) == clean(convert_to_dict(extracted))
def test_get_subgraph() -> None:
    """Compare known correct subgraphs to generated subgraphs."""
    loading_context = LoadingContext({"construct_tool_object": default_make_tool})
    wf = Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri()
    loading_context.do_update = False
    tool = load_tool(wf, loading_context)

    sg = Path(get_data("tests/subgraph")).as_uri()

    for a in (
        "file1",
        "file2",
        "file3",
        "count_output",
        "output3",
        "output4",
        "output5",
        "step1",
        "step2",
        "step3",
        "step4",
        "step5",
    ):
        assert isinstance(tool, Workflow)
        extracted = get_subgraph([wf + "#" + a], tool, loading_context)
        with open(get_data("tests/subgraph/extract_" + a + ".json")) as f:
            assert json.load(f) == clean(convert_to_dict(extracted), sg)
Example #4
0
def make_avro_schema(i: List[Any], loader: Loader,) -> Names:
    """
    All in one convenience function.

    Call make_avro() and make_avro_schema_from_avro() separately if you need
    the intermediate result for diagnostic output.
    """
    names = Names()
    avro = make_avro(i, loader)
    make_avsc_object(convert_to_dict(avro), names)
    return names
def test_get_subgraph_long_out_form() -> None:
    """Compare subgraphs generatation when 'out' is in the long form."""
    loading_context = LoadingContext({"construct_tool_object": default_make_tool})
    wf = Path(get_data("tests/subgraph/1432.cwl")).as_uri()
    loading_context.do_update = False
    tool = load_tool(wf, loading_context)

    sg = Path(get_data("tests/")).as_uri()

    assert isinstance(tool, Workflow)
    extracted = get_subgraph([wf + "#step2"], tool, loading_context)
    with open(get_data("tests/subgraph/extract_step2_1432.json")) as f:
        assert json.load(f) == clean(convert_to_dict(extracted), sg)
def test_get_step() -> None:
    loading_context = LoadingContext({"construct_tool_object": default_make_tool})
    wf = Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri()
    loading_context.do_update = False
    tool = load_tool(wf, loading_context)
    assert isinstance(tool, Workflow)

    sg = Path(get_data("tests/subgraph")).as_uri()

    for a in (
        "step1",
        "step2",
        "step3",
        "step4",
        "step5",
    ):
        extracted = get_step(tool, wf + "#" + a, loading_context)
        with open(get_data("tests/subgraph/single_" + a + ".json")) as f:
            assert json.load(f) == clean(convert_to_dict(extracted), sg)
Example #7
0
def test_get_subgraph():
    loadingContext = LoadingContext({"construct_tool_object": default_make_tool})
    wf = norm(Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri())
    loadingContext.do_update = False
    tool = load_tool(wf, loadingContext)

    sg = norm(Path(get_data("tests/subgraph")).as_uri())

    def clean(val):
        if isinstance(val, string_types):
            if val.startswith(sg):
                return val[len(sg)+1:]
        if isinstance(val, dict):
            return {k: clean(v) for k,v in val.items()}
        if isinstance(val, list):
            return [clean(v) for v in val]
        return val

    for a in ("file1", "file2", "file3", "count_output",
              "output3", "output4", "output5",
              "step1", "step2", "step3", "step4", "step5"):
        extracted = get_subgraph([wf+"#"+a], tool)
        with open(get_data("tests/subgraph/extract_"+a+".json")) as f:
            assert json.load(f) == clean(convert_to_dict(extracted))
Example #8
0
def make_avro_schema_from_avro(avro: List[Union[Avro, Dict[str, str], str]]) -> Names:
    names = Names()
    make_avsc_object(convert_to_dict(avro), names)
    return names
Example #9
0
def AvroSchemaFromJSONData(j, names):  # type: (Any, avro.schema.Names) -> Any
    return avro.schema.make_avsc_object(convert_to_dict(j), names)
Example #10
0
    def __init__(self, toolpath_object: CommentedMap,
                 loadingContext: LoadingContext) -> None:
        """Build a Process object from the provided dictionary."""
        super(Process, self).__init__()
        self.metadata = getdefault(loadingContext.metadata,
                                   {})  # type: CWLObjectType
        self.provenance_object = None  # type: Optional[ProvenanceProfile]
        self.parent_wf = None  # type: Optional[ProvenanceProfile]
        global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY  # pylint: disable=global-statement
        if SCHEMA_FILE is None or SCHEMA_ANY is None or SCHEMA_DIR is None:
            get_schema("v1.0")
            SCHEMA_ANY = cast(
                CWLObjectType,
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/salad#Any"],
            )
            SCHEMA_FILE = cast(
                CWLObjectType,
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/cwl#File"],
            )
            SCHEMA_DIR = cast(
                CWLObjectType,
                SCHEMA_CACHE["v1.0"]
                [3].idx["https://w3id.org/cwl/cwl#Directory"],
            )

        self.names = make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY],
                                      Loader({}))
        self.tool = toolpath_object
        self.requirements = copy.deepcopy(
            getdefault(loadingContext.requirements, []))
        self.requirements.extend(self.tool.get("requirements", []))
        if "id" not in self.tool:
            self.tool["id"] = "_:" + str(uuid.uuid4())
        self.requirements.extend(
            cast(
                List[CWLObjectType],
                get_overrides(getdefault(loadingContext.overrides_list, []),
                              self.tool["id"]).get("requirements", []),
            ))
        self.hints = copy.deepcopy(getdefault(loadingContext.hints, []))
        self.hints.extend(self.tool.get("hints", []))
        # Versions of requirements and hints which aren't mutated.
        self.original_requirements = copy.deepcopy(self.requirements)
        self.original_hints = copy.deepcopy(self.hints)
        self.doc_loader = loadingContext.loader
        self.doc_schema = loadingContext.avsc_names

        self.formatgraph = None  # type: Optional[Graph]
        if self.doc_loader is not None:
            self.formatgraph = self.doc_loader.graph

        checkRequirements(self.tool, supportedProcessRequirements)
        self.validate_hints(
            cast(Names, loadingContext.avsc_names),
            self.tool.get("hints", []),
            strict=getdefault(loadingContext.strict, False),
        )

        self.schemaDefs = {}  # type: MutableMapping[str, CWLObjectType]

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd is not None:
            sdtypes = cast(MutableSequence[CWLObjectType], sd["types"])
            avroize_type(cast(MutableSequence[CWLOutputType], sdtypes))
            av = make_valid_avro(
                sdtypes,
                {
                    cast(str, t["name"]): cast(Dict[str, Any], t)
                    for t in sdtypes
                },
                set(),
            )
            for i in av:
                self.schemaDefs[i["name"]] = i  # type: ignore
            make_avsc_object(convert_to_dict(av), self.names)

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": [],
        }  # type: CWLObjectType
        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": [],
        }  # type: CWLObjectType

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.deepcopy(i)
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise ValidationException(
                        "Missing 'type' in parameter '{}'".format(c["name"]))

                if "default" in c and "null" not in aslist(c["type"]):
                    nullable = ["null"]
                    nullable.extend(aslist(c["type"]))
                    c["type"] = nullable
                else:
                    c["type"] = c["type"]
                avroize_type(c["type"], c["name"])
                if key == "inputs":
                    cast(List[CWLObjectType],
                         self.inputs_record_schema["fields"]).append(c)
                elif key == "outputs":
                    cast(List[CWLObjectType],
                         self.outputs_record_schema["fields"]).append(c)

        with SourceLine(toolpath_object, "inputs", ValidationException):
            self.inputs_record_schema = cast(
                CWLObjectType,
                make_valid_avro(self.inputs_record_schema, {}, set()),
            )
            make_avsc_object(convert_to_dict(self.inputs_record_schema),
                             self.names)
        with SourceLine(toolpath_object, "outputs", ValidationException):
            self.outputs_record_schema = cast(
                CWLObjectType,
                make_valid_avro(self.outputs_record_schema, {}, set()),
            )
            make_avsc_object(convert_to_dict(self.outputs_record_schema),
                             self.names)

        if toolpath_object.get("class") is not None and not getdefault(
                loadingContext.disable_js_validation, False):
            validate_js_options = (
                None)  # type: Optional[Dict[str, Union[List[str], str, int]]]
            if loadingContext.js_hint_options_file is not None:
                try:
                    with open(loadingContext.js_hint_options_file
                              ) as options_file:
                        validate_js_options = json.load(options_file)
                except (OSError, ValueError):
                    _logger.error(
                        "Failed to read options file %s",
                        loadingContext.js_hint_options_file,
                    )
                    raise
            if self.doc_schema is not None:
                validate_js_expressions(
                    toolpath_object,
                    self.doc_schema.names[toolpath_object["class"]],
                    validate_js_options,
                )

        dockerReq, is_req = self.get_requirement("DockerRequirement")

        if (dockerReq is not None and "dockerOutputDirectory" in dockerReq
                and is_req is not None and not is_req):
            _logger.warning(
                SourceLine(item=dockerReq, raise_type=str).makeError(
                    "When 'dockerOutputDirectory' is declared, DockerRequirement "
                    "should go in the 'requirements' section, not 'hints'."
                    ""))

        if (dockerReq is not None and is_req is not None and
                dockerReq.get("dockerOutputDirectory") == "/var/spool/cwl"):
            if is_req:
                # In this specific case, it is legal to have /var/spool/cwl, so skip the check.
                pass
            else:
                # Must be a requirement
                var_spool_cwl_detector(self.tool)
        else:
            var_spool_cwl_detector(self.tool)
Example #11
0
    def bind_input(
        self,
        schema: CWLObjectType,
        datum: Union[CWLObjectType, List[CWLObjectType]],
        discover_secondaryFiles: bool,
        lead_pos: Optional[Union[int, List[int]]] = None,
        tail_pos: Optional[Union[str, List[int]]] = None,
    ) -> List[MutableMapping[str, Union[str, List[int]]]]:
        debug = _logger.isEnabledFor(logging.DEBUG)

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[str, Union[str, List[int]]]]
        binding = (
            {}
        )  # type: Union[MutableMapping[str, Union[str, List[int]]], CommentedMap]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(
            schema["inputBinding"], MutableMapping
        ):
            binding = CommentedMap(schema["inputBinding"].items())

            bp = list(aslist(lead_pos))
            if "position" in binding:
                position = binding["position"]
                if isinstance(position, str):  # no need to test the CWL Version
                    # the schema for v1.0 only allow ints
                    result = self.do_eval(position, context=datum)
                    if not isinstance(result, int):
                        raise SourceLine(
                            schema["inputBinding"], "position", WorkflowException, debug
                        ).makeError(
                            "'position' expressions must evaluate to an int, "
                            f"not a {type(result)}. Expression {position} "
                            f"resulted in '{result}'."
                        )
                    binding["position"] = result
                    bp.append(result)
                else:
                    bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, str) and self.names.has_name(t, None):
                    avsc = self.names.get_name(t, None)
                elif (
                    isinstance(t, MutableMapping)
                    and "name" in t
                    and self.names.has_name(cast(str, t["name"]), None)
                ):
                    avsc = self.names.get_name(cast(str, t["name"]), None)
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                if validate(avsc, datum, vocab=INPUT_OBJ_VOCAB):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                        bound_input = True
            if not bound_input:
                raise ValidationException(
                    "'{}' is not a valid union {}".format(datum, schema["type"])
                )
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if (
                binding
                and "inputBinding" not in st
                and "type" in st
                and st["type"] == "array"
                and "itemSeparator" not in binding
            ):
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles,
                )
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles,
                    )
                )
        else:
            if schema["type"] == "org.w3id.cwl.salad.Any":
                if isinstance(datum, dict):
                    if datum.get("class") == "File":
                        schema["type"] = "org.w3id.cwl.cwl.File"
                    elif datum.get("class") == "Directory":
                        schema["type"] = "org.w3id.cwl.cwl.Directory"
                    else:
                        schema["type"] = "record"
                        schema["fields"] = [
                            {"name": field_name, "type": "Any"}
                            for field_name in datum.keys()
                        ]
                elif isinstance(datum, list):
                    schema["type"] = "array"
                    schema["items"] = "Any"

            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[cast(str, schema["type"])]

            if schema["type"] == "record":
                datum = cast(CWLObjectType, datum)
                for f in cast(List[CWLObjectType], schema["fields"]):
                    name = cast(str, f["name"])
                    if name in datum and datum[name] is not None:
                        bindings.extend(
                            self.bind_input(
                                f,
                                cast(CWLObjectType, datum[name]),
                                lead_pos=lead_pos,
                                tail_pos=name,
                                discover_secondaryFiles=discover_secondaryFiles,
                            )
                        )
                    else:
                        datum[name] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(cast(MutableSequence[CWLObjectType], datum)):
                    b2 = None
                    if binding:
                        b2 = cast(CWLObjectType, copy.deepcopy(binding))
                        b2["datum"] = item
                    itemschema = {
                        "type": schema["items"],
                        "inputBinding": b2,
                    }  # type: CWLObjectType
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    )
                binding = {}

            def _capture_files(f: CWLObjectType) -> CWLObjectType:
                self.files.append(f)
                return f

            if schema["type"] == "org.w3id.cwl.cwl.File":
                datum = cast(CWLObjectType, datum)
                self.files.append(datum)

                loadContents_sourceline = (
                    None
                )  # type: Union[None, MutableMapping[str, Union[str, List[int]]], CWLObjectType]
                if binding and binding.get("loadContents"):
                    loadContents_sourceline = binding
                elif schema.get("loadContents"):
                    loadContents_sourceline = schema

                if loadContents_sourceline and loadContents_sourceline["loadContents"]:
                    with SourceLine(
                        loadContents_sourceline,
                        "loadContents",
                        WorkflowException,
                        debug,
                    ):
                        try:
                            with self.fs_access.open(
                                cast(str, datum["location"]), "rb"
                            ) as f2:
                                datum["contents"] = content_limit_respected_read(f2)
                        except Exception as e:
                            raise Exception(
                                "Reading {}\n{}".format(datum["location"], e)
                            )

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                        sf_schema = aslist(schema["secondaryFiles"])
                    elif not discover_secondaryFiles:
                        sf_schema = []  # trust the inputs
                    else:
                        sf_schema = aslist(schema["secondaryFiles"])

                    for num, sf_entry in enumerate(sf_schema):
                        if "required" in sf_entry and sf_entry["required"] is not None:
                            required_result = self.do_eval(
                                sf_entry["required"], context=datum
                            )
                            if not (
                                isinstance(required_result, bool)
                                or required_result is None
                            ):
                                if sf_schema == schema["secondaryFiles"]:
                                    sf_item: Any = sf_schema[num]
                                else:
                                    sf_item = sf_schema
                                raise SourceLine(
                                    sf_item, "required", WorkflowException, debug
                                ).makeError(
                                    "The result of a expression in the field "
                                    "'required' must "
                                    f"be a bool or None, not a {type(required_result)}. "
                                    f"Expression '{sf_entry['required']}' resulted "
                                    f"in '{required_result}'."
                                )
                            sf_required = required_result
                        else:
                            sf_required = True

                        if "$(" in sf_entry["pattern"] or "${" in sf_entry["pattern"]:
                            sfpath = self.do_eval(sf_entry["pattern"], context=datum)
                        else:
                            sfpath = substitute(
                                cast(str, datum["basename"]), sf_entry["pattern"]
                            )

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False

                            if isinstance(sfname, str):
                                d_location = cast(str, datum["location"])
                                if "/" in d_location:
                                    sf_location = (
                                        d_location[0 : d_location.rindex("/") + 1]
                                        + sfname
                                    )
                                else:
                                    sf_location = d_location + sfname
                                sfbasename = sfname
                            elif isinstance(sfname, MutableMapping):
                                sf_location = sfname["location"]
                                sfbasename = sfname["basename"]
                            else:
                                raise SourceLine(
                                    sf_entry, "pattern", WorkflowException, debug
                                ).makeError(
                                    "Expected secondaryFile expression to "
                                    "return type 'str', a 'File' or 'Directory' "
                                    "dictionary, or a list of the same. Received "
                                    f"'{type(sfname)} from '{sf_entry['pattern']}'."
                                )

                            for d in cast(
                                MutableSequence[MutableMapping[str, str]],
                                datum["secondaryFiles"],
                            ):
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1 :
                                    ]
                                if d["basename"] == sfbasename:
                                    found = True

                            if not found:

                                def addsf(
                                    files: MutableSequence[CWLObjectType],
                                    newsf: CWLObjectType,
                                ) -> None:
                                    for f in files:
                                        if f["location"] == newsf["location"]:
                                            f["basename"] = newsf["basename"]
                                            return
                                    files.append(newsf)

                                if isinstance(sfname, MutableMapping):
                                    addsf(
                                        cast(
                                            MutableSequence[CWLObjectType],
                                            datum["secondaryFiles"],
                                        ),
                                        sfname,
                                    )
                                elif discover_secondaryFiles and self.fs_access.exists(
                                    sf_location
                                ):
                                    addsf(
                                        cast(
                                            MutableSequence[CWLObjectType],
                                            datum["secondaryFiles"],
                                        ),
                                        {
                                            "location": sf_location,
                                            "basename": sfname,
                                            "class": "File",
                                        },
                                    )
                                elif sf_required:
                                    raise SourceLine(
                                        schema,
                                        "secondaryFiles",
                                        WorkflowException,
                                        debug,
                                    ).makeError(
                                        "Missing required secondary file '%s' from file object: %s"
                                        % (sfname, json_dumps(datum, indent=4))
                                    )

                    normalizeFilesDirs(
                        cast(MutableSequence[CWLObjectType], datum["secondaryFiles"])
                    )

                if "format" in schema:
                    eval_format: Any = self.do_eval(schema["format"])
                    if isinstance(eval_format, str):
                        evaluated_format: Union[str, List[str]] = eval_format
                    elif isinstance(eval_format, MutableSequence):
                        for index, entry in enumerate(eval_format):
                            message = None
                            if not isinstance(entry, str):
                                message = (
                                    "An expression in the 'format' field must "
                                    "evaluate to a string, or list of strings. "
                                    "However a non-string item was received: "
                                    f"'{entry}' of type '{type(entry)}'. "
                                    f"The expression was '{schema['format']}' and "
                                    f"its fully evaluated result is '{eval_format}'."
                                )
                            if expression.needs_parsing(entry):
                                message = (
                                    "For inputs, 'format' field can either "
                                    "contain a single CWL Expression or CWL Parameter "
                                    "Reference, a single format string, or a list of "
                                    "format strings. But the list cannot contain CWL "
                                    "Expressions or CWL Parameter References. List "
                                    f"entry number {index+1} contains the following "
                                    "unallowed CWL Parameter Reference or Expression: "
                                    f"'{entry}'."
                                )
                            if message:
                                raise SourceLine(
                                    schema["format"], index, WorkflowException, debug
                                ).makeError(message)
                        evaluated_format = cast(List[str], eval_format)
                    else:
                        raise SourceLine(
                            schema, "format", WorkflowException, debug
                        ).makeError(
                            "An expression in the 'format' field must "
                            "evaluate to a string, or list of strings. "
                            "However the type of the expression result was "
                            f"{type(eval_format)}. "
                            f"The expression was '{schema['format']}' and "
                            f"its fully evaluated result is 'eval_format'."
                        )
                    try:
                        check_format(
                            datum,
                            evaluated_format,
                            self.formatgraph,
                        )
                    except ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" % (schema["name"], schema["format"], ve)
                        ) from ve

                visit_class(
                    datum.get("secondaryFiles", []),
                    ("File", "Directory"),
                    _capture_files,
                )

            if schema["type"] == "org.w3id.cwl.cwl.Directory":
                datum = cast(CWLObjectType, datum)
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(
                        self.fs_access,
                        datum,
                        (ll == "deep_listing"),
                    )
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = cast(List[int], binding["position"]) + cast(
                    List[int], bi["position"]
                )
            bindings.append(binding)

        return bindings
Example #12
0
    def bind_input(
        self,
        schema: CWLObjectType,
        datum: Union[CWLObjectType, List[CWLObjectType]],
        discover_secondaryFiles: bool,
        lead_pos: Optional[Union[int, List[int]]] = None,
        tail_pos: Optional[Union[str, List[int]]] = None,
    ) -> List[MutableMapping[str, Union[str, List[int]]]]:

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[str, Union[str, List[int]]]]
        binding = (
            {}
        )  # type: Union[MutableMapping[str, Union[str, List[int]]], CommentedMap]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   MutableMapping):
            binding = CommentedMap(schema["inputBinding"].items())

            bp = list(aslist(lead_pos))
            if "position" in binding:
                position = binding["position"]
                if isinstance(position,
                              str):  # no need to test the CWL Version
                    # the schema for v1.0 only allow ints
                    binding["position"] = self.do_eval(position, context=datum)
                    bp.append(binding["position"])
                else:
                    bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, str) and self.names.has_name(t, None):
                    avsc = self.names.get_name(t, None)
                elif (isinstance(t, MutableMapping) and "name" in t
                      and self.names.has_name(cast(str, t["name"]), None)):
                    avsc = self.names.get_name(cast(str, t["name"]), None)
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                if validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                        bound_input = True
            if not bound_input:
                raise ValidationException(
                    "'{}' is not a valid union {}".format(
                        datum, schema["type"]))
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if (binding and "inputBinding" not in st and "type" in st
                    and st["type"] == "array"
                    and "itemSeparator" not in binding):
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles,
                )
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles,
                    ))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[cast(str, schema["type"])]

            if schema["type"] == "record":
                datum = cast(CWLObjectType, datum)
                for f in cast(List[CWLObjectType], schema["fields"]):
                    name = cast(str, f["name"])
                    if name in datum and datum[name] is not None:
                        bindings.extend(
                            self.bind_input(
                                f,
                                cast(CWLObjectType, datum[name]),
                                lead_pos=lead_pos,
                                tail_pos=name,
                                discover_secondaryFiles=discover_secondaryFiles,
                            ))
                    else:
                        datum[name] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(
                        cast(MutableSequence[CWLObjectType], datum)):
                    b2 = None
                    if binding:
                        b2 = cast(CWLObjectType, copy.deepcopy(binding))
                        b2["datum"] = item
                    itemschema = {
                        "type": schema["items"],
                        "inputBinding": b2,
                    }  # type: CWLObjectType
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        ))
                binding = {}

            def _capture_files(f: CWLObjectType) -> CWLObjectType:
                self.files.append(f)
                return f

            if schema["type"] == "File":
                datum = cast(CWLObjectType, datum)
                self.files.append(datum)

                loadContents_sourceline = (
                    None
                )  # type: Union[None, MutableMapping[str, Union[str, List[int]]], CWLObjectType]
                if binding and binding.get("loadContents"):
                    loadContents_sourceline = binding
                elif schema.get("loadContents"):
                    loadContents_sourceline = schema

                if loadContents_sourceline and loadContents_sourceline[
                        "loadContents"]:
                    with SourceLine(loadContents_sourceline, "loadContents",
                                    WorkflowException):
                        try:
                            with self.fs_access.open(
                                    cast(str, datum["location"]), "rb") as f2:
                                datum[
                                    "contents"] = content_limit_respected_read(
                                        f2)
                        except Exception as e:
                            raise Exception("Reading {}\n{}".format(
                                datum["location"], e))

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if "required" in sf:
                            sf_required = self.do_eval(sf["required"],
                                                       context=datum)
                        else:
                            sf_required = True

                        if "$(" in sf["pattern"] or "${" in sf["pattern"]:
                            sfpath = self.do_eval(sf["pattern"], context=datum)
                        else:
                            sfpath = substitute(cast(str, datum["basename"]),
                                                sf["pattern"])

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False

                            if isinstance(sfname, str):
                                d_location = cast(str, datum["location"])
                                if "/" in d_location:
                                    sf_location = (
                                        d_location[0:d_location.rindex("/") +
                                                   1] + sfname)
                                else:
                                    sf_location = d_location + sfname
                                sfbasename = sfname
                            elif isinstance(sfname, MutableMapping):
                                sf_location = sfname["location"]
                                sfbasename = sfname["basename"]
                            else:
                                raise WorkflowException(
                                    "Expected secondaryFile expression to return type 'str' or 'MutableMapping', received '%s'"
                                    % (type(sfname)))

                            for d in cast(
                                    MutableSequence[MutableMapping[str, str]],
                                    datum["secondaryFiles"],
                            ):
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1:]
                                if d["basename"] == sfbasename:
                                    found = True

                            if not found:

                                def addsf(
                                    files: MutableSequence[CWLObjectType],
                                    newsf: CWLObjectType,
                                ) -> None:
                                    for f in files:
                                        if f["location"] == newsf["location"]:
                                            f["basename"] = newsf["basename"]
                                            return
                                    files.append(newsf)

                                if isinstance(sfname, MutableMapping):
                                    addsf(
                                        cast(
                                            MutableSequence[CWLObjectType],
                                            datum["secondaryFiles"],
                                        ),
                                        sfname,
                                    )
                                elif discover_secondaryFiles and self.fs_access.exists(
                                        sf_location):
                                    addsf(
                                        cast(
                                            MutableSequence[CWLObjectType],
                                            datum["secondaryFiles"],
                                        ),
                                        {
                                            "location": sf_location,
                                            "basename": sfname,
                                            "class": "File",
                                        },
                                    )
                                elif sf_required:
                                    raise WorkflowException(
                                        "Missing required secondary file '%s' from file object: %s"
                                        %
                                        (sfname, json_dumps(datum, indent=4)))

                    normalizeFilesDirs(
                        cast(MutableSequence[CWLObjectType],
                             datum["secondaryFiles"]))

                if "format" in schema:
                    try:
                        check_format(
                            datum,
                            cast(Union[List[str], str],
                                 self.do_eval(schema["format"])),
                            self.formatgraph,
                        )
                    except ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" %
                            (schema["name"], schema["format"], ve)) from ve

                visit_class(
                    datum.get("secondaryFiles", []),
                    ("File", "Directory"),
                    _capture_files,
                )

            if schema["type"] == "Directory":
                datum = cast(CWLObjectType, datum)
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(
                        self.fs_access,
                        datum,
                        (ll == "deep_listing"),
                    )
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = cast(List[int], binding["position"]) + cast(
                    List[int], bi["position"])
            bindings.append(binding)

        return bindings
Example #13
0
def make_avro_schema_from_avro(avro):
    # type: (List[Union[Avro, Dict[Text, Text], Text]]) -> Names
    names = Names()
    make_avsc_object(convert_to_dict(avro), names)
    return names