Beispiel #1
0
    def bind_input(
        self,
        schema: CWLObjectType,
        datum: Union[CWLObjectType, List[CWLObjectType]],
        discover_secondaryFiles: bool,
        lead_pos: Optional[Union[int, List[int]]] = None,
        tail_pos: Optional[Union[str, List[int]]] = None,
    ) -> List[MutableMapping[str, Union[str, List[int]]]]:
        debug = _logger.isEnabledFor(logging.DEBUG)

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[str, Union[str, List[int]]]]
        binding = (
            {}
        )  # type: Union[MutableMapping[str, Union[str, List[int]]], CommentedMap]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(
            schema["inputBinding"], MutableMapping
        ):
            binding = CommentedMap(schema["inputBinding"].items())

            bp = list(aslist(lead_pos))
            if "position" in binding:
                position = binding["position"]
                if isinstance(position, str):  # no need to test the CWL Version
                    # the schema for v1.0 only allow ints
                    result = self.do_eval(position, context=datum)
                    if not isinstance(result, int):
                        raise SourceLine(
                            schema["inputBinding"], "position", WorkflowException, debug
                        ).makeError(
                            "'position' expressions must evaluate to an int, "
                            f"not a {type(result)}. Expression {position} "
                            f"resulted in '{result}'."
                        )
                    binding["position"] = result
                    bp.append(result)
                else:
                    bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, str) and self.names.has_name(t, None):
                    avsc = self.names.get_name(t, None)
                elif (
                    isinstance(t, MutableMapping)
                    and "name" in t
                    and self.names.has_name(cast(str, t["name"]), None)
                ):
                    avsc = self.names.get_name(cast(str, t["name"]), None)
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                if validate(avsc, datum, vocab=INPUT_OBJ_VOCAB):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                        bound_input = True
            if not bound_input:
                raise ValidationException(
                    "'{}' is not a valid union {}".format(datum, schema["type"])
                )
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if (
                binding
                and "inputBinding" not in st
                and "type" in st
                and st["type"] == "array"
                and "itemSeparator" not in binding
            ):
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles,
                )
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles,
                    )
                )
        else:
            if schema["type"] == "org.w3id.cwl.salad.Any":
                if isinstance(datum, dict):
                    if datum.get("class") == "File":
                        schema["type"] = "org.w3id.cwl.cwl.File"
                    elif datum.get("class") == "Directory":
                        schema["type"] = "org.w3id.cwl.cwl.Directory"
                    else:
                        schema["type"] = "record"
                        schema["fields"] = [
                            {"name": field_name, "type": "Any"}
                            for field_name in datum.keys()
                        ]
                elif isinstance(datum, list):
                    schema["type"] = "array"
                    schema["items"] = "Any"

            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[cast(str, schema["type"])]

            if schema["type"] == "record":
                datum = cast(CWLObjectType, datum)
                for f in cast(List[CWLObjectType], schema["fields"]):
                    name = cast(str, f["name"])
                    if name in datum and datum[name] is not None:
                        bindings.extend(
                            self.bind_input(
                                f,
                                cast(CWLObjectType, datum[name]),
                                lead_pos=lead_pos,
                                tail_pos=name,
                                discover_secondaryFiles=discover_secondaryFiles,
                            )
                        )
                    else:
                        datum[name] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(cast(MutableSequence[CWLObjectType], datum)):
                    b2 = None
                    if binding:
                        b2 = cast(CWLObjectType, copy.deepcopy(binding))
                        b2["datum"] = item
                    itemschema = {
                        "type": schema["items"],
                        "inputBinding": b2,
                    }  # type: CWLObjectType
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    )
                binding = {}

            def _capture_files(f: CWLObjectType) -> CWLObjectType:
                self.files.append(f)
                return f

            if schema["type"] == "org.w3id.cwl.cwl.File":
                datum = cast(CWLObjectType, datum)
                self.files.append(datum)

                loadContents_sourceline = (
                    None
                )  # type: Union[None, MutableMapping[str, Union[str, List[int]]], CWLObjectType]
                if binding and binding.get("loadContents"):
                    loadContents_sourceline = binding
                elif schema.get("loadContents"):
                    loadContents_sourceline = schema

                if loadContents_sourceline and loadContents_sourceline["loadContents"]:
                    with SourceLine(
                        loadContents_sourceline,
                        "loadContents",
                        WorkflowException,
                        debug,
                    ):
                        try:
                            with self.fs_access.open(
                                cast(str, datum["location"]), "rb"
                            ) as f2:
                                datum["contents"] = content_limit_respected_read(f2)
                        except Exception as e:
                            raise Exception(
                                "Reading {}\n{}".format(datum["location"], e)
                            )

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                        sf_schema = aslist(schema["secondaryFiles"])
                    elif not discover_secondaryFiles:
                        sf_schema = []  # trust the inputs
                    else:
                        sf_schema = aslist(schema["secondaryFiles"])

                    for num, sf_entry in enumerate(sf_schema):
                        if "required" in sf_entry and sf_entry["required"] is not None:
                            required_result = self.do_eval(
                                sf_entry["required"], context=datum
                            )
                            if not (
                                isinstance(required_result, bool)
                                or required_result is None
                            ):
                                if sf_schema == schema["secondaryFiles"]:
                                    sf_item: Any = sf_schema[num]
                                else:
                                    sf_item = sf_schema
                                raise SourceLine(
                                    sf_item, "required", WorkflowException, debug
                                ).makeError(
                                    "The result of a expression in the field "
                                    "'required' must "
                                    f"be a bool or None, not a {type(required_result)}. "
                                    f"Expression '{sf_entry['required']}' resulted "
                                    f"in '{required_result}'."
                                )
                            sf_required = required_result
                        else:
                            sf_required = True

                        if "$(" in sf_entry["pattern"] or "${" in sf_entry["pattern"]:
                            sfpath = self.do_eval(sf_entry["pattern"], context=datum)
                        else:
                            sfpath = substitute(
                                cast(str, datum["basename"]), sf_entry["pattern"]
                            )

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False

                            if isinstance(sfname, str):
                                d_location = cast(str, datum["location"])
                                if "/" in d_location:
                                    sf_location = (
                                        d_location[0 : d_location.rindex("/") + 1]
                                        + sfname
                                    )
                                else:
                                    sf_location = d_location + sfname
                                sfbasename = sfname
                            elif isinstance(sfname, MutableMapping):
                                sf_location = sfname["location"]
                                sfbasename = sfname["basename"]
                            else:
                                raise SourceLine(
                                    sf_entry, "pattern", WorkflowException, debug
                                ).makeError(
                                    "Expected secondaryFile expression to "
                                    "return type 'str', a 'File' or 'Directory' "
                                    "dictionary, or a list of the same. Received "
                                    f"'{type(sfname)} from '{sf_entry['pattern']}'."
                                )

                            for d in cast(
                                MutableSequence[MutableMapping[str, str]],
                                datum["secondaryFiles"],
                            ):
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1 :
                                    ]
                                if d["basename"] == sfbasename:
                                    found = True

                            if not found:

                                def addsf(
                                    files: MutableSequence[CWLObjectType],
                                    newsf: CWLObjectType,
                                ) -> None:
                                    for f in files:
                                        if f["location"] == newsf["location"]:
                                            f["basename"] = newsf["basename"]
                                            return
                                    files.append(newsf)

                                if isinstance(sfname, MutableMapping):
                                    addsf(
                                        cast(
                                            MutableSequence[CWLObjectType],
                                            datum["secondaryFiles"],
                                        ),
                                        sfname,
                                    )
                                elif discover_secondaryFiles and self.fs_access.exists(
                                    sf_location
                                ):
                                    addsf(
                                        cast(
                                            MutableSequence[CWLObjectType],
                                            datum["secondaryFiles"],
                                        ),
                                        {
                                            "location": sf_location,
                                            "basename": sfname,
                                            "class": "File",
                                        },
                                    )
                                elif sf_required:
                                    raise SourceLine(
                                        schema,
                                        "secondaryFiles",
                                        WorkflowException,
                                        debug,
                                    ).makeError(
                                        "Missing required secondary file '%s' from file object: %s"
                                        % (sfname, json_dumps(datum, indent=4))
                                    )

                    normalizeFilesDirs(
                        cast(MutableSequence[CWLObjectType], datum["secondaryFiles"])
                    )

                if "format" in schema:
                    eval_format: Any = self.do_eval(schema["format"])
                    if isinstance(eval_format, str):
                        evaluated_format: Union[str, List[str]] = eval_format
                    elif isinstance(eval_format, MutableSequence):
                        for index, entry in enumerate(eval_format):
                            message = None
                            if not isinstance(entry, str):
                                message = (
                                    "An expression in the 'format' field must "
                                    "evaluate to a string, or list of strings. "
                                    "However a non-string item was received: "
                                    f"'{entry}' of type '{type(entry)}'. "
                                    f"The expression was '{schema['format']}' and "
                                    f"its fully evaluated result is '{eval_format}'."
                                )
                            if expression.needs_parsing(entry):
                                message = (
                                    "For inputs, 'format' field can either "
                                    "contain a single CWL Expression or CWL Parameter "
                                    "Reference, a single format string, or a list of "
                                    "format strings. But the list cannot contain CWL "
                                    "Expressions or CWL Parameter References. List "
                                    f"entry number {index+1} contains the following "
                                    "unallowed CWL Parameter Reference or Expression: "
                                    f"'{entry}'."
                                )
                            if message:
                                raise SourceLine(
                                    schema["format"], index, WorkflowException, debug
                                ).makeError(message)
                        evaluated_format = cast(List[str], eval_format)
                    else:
                        raise SourceLine(
                            schema, "format", WorkflowException, debug
                        ).makeError(
                            "An expression in the 'format' field must "
                            "evaluate to a string, or list of strings. "
                            "However the type of the expression result was "
                            f"{type(eval_format)}. "
                            f"The expression was '{schema['format']}' and "
                            f"its fully evaluated result is 'eval_format'."
                        )
                    try:
                        check_format(
                            datum,
                            evaluated_format,
                            self.formatgraph,
                        )
                    except ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" % (schema["name"], schema["format"], ve)
                        ) from ve

                visit_class(
                    datum.get("secondaryFiles", []),
                    ("File", "Directory"),
                    _capture_files,
                )

            if schema["type"] == "org.w3id.cwl.cwl.Directory":
                datum = cast(CWLObjectType, datum)
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(
                        self.fs_access,
                        datum,
                        (ll == "deep_listing"),
                    )
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = cast(List[int], binding["position"]) + cast(
                    List[int], bi["position"]
                )
            bindings.append(binding)

        return bindings
Beispiel #2
0
    def bind_input(self,
                   schema,                   # type: MutableMapping[Text, Any]
                   datum,                    # type: Any
                   discover_secondaryFiles,  # type: bool
                   lead_pos=None,            # type: Optional[Union[int, List[int]]]
                   tail_pos=None,            # type: Optional[List[int]]
                  ):  # type: (...) -> List[MutableMapping[Text, Any]]

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[Text, Text]]
        binding = None  # type: Optional[MutableMapping[Text,Any]]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"], MutableMapping):
            binding = CommentedMap(schema["inputBinding"].items())
            assert binding is not None

            bp = list(aslist(lead_pos))
            if "position" in binding:
                bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, string_types) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t, MutableMapping) and "name" in t and self.names.has_name(t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                assert avsc is not None
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
                    else:
                        self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
                        bound_input = True
            if not bound_input:
                raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if binding is not None\
                    and "inputBinding" not in st\
                    and "type" in st\
                    and st["type"] == "array"\
                    and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
            else:
                bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum and datum[f["name"]] is not None:
                        bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding is not None:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles))
                binding = None

            def _capture_files(f):
                self.files.append(f)
                return f

            if schema["type"] == "File":
                self.files.append(datum)
                if (binding and binding.get("loadContents")) or schema.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT).decode("utf-8")

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if 'required' in sf:
                            sf_required = self.do_eval(sf['required'], context=datum)
                        else:
                            sf_required = True

                        if "$(" in sf["pattern"] or "${" in sf["pattern"]:
                            sfpath = self.do_eval(sf["pattern"], context=datum)
                        else:
                            sfpath = substitute(datum["basename"], sf["pattern"])

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False
                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][d["location"].rindex("/")+1:]
                                if d["basename"] == sfname:
                                    found = True
                            if not found:
                                sf_location = datum["location"][0:datum["location"].rindex("/")+1]+sfname
                                if isinstance(sfname, MutableMapping):
                                    datum["secondaryFiles"].append(sfname)
                                elif discover_secondaryFiles and self.fs_access.exists(sf_location):
                                    datum["secondaryFiles"].append({
                                        "location": sf_location,
                                        "basename": sfname,
                                        "class": "File"})
                                elif sf_required:
                                    raise WorkflowException("Missing required secondary file '%s' from file object: %s" % (
                                        sfname, json_dumps(datum, indent=4)))

                    normalizeFilesDirs(datum["secondaryFiles"])

                if "format" in schema:
                    try:
                        check_format(datum, self.do_eval(schema["format"]),
                                     self.formatgraph)
                    except validate.ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" % (schema["name"], schema["format"], ve))

                visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files)

            if schema["type"] == "Directory":
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding is not None:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Beispiel #3
0
    def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
        bindings = []
        binding = None
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(
                    binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            if "valueFrom" in binding:
                binding["do_eval"] = binding["valueFrom"]
            binding["valueFrom"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, basestring) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t,
                                dict) and "name" in t and self.names.has_name(
                                    t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema,
                                           datum,
                                           lead_pos=lead_pos,
                                           tail_pos=tail_pos)
            raise validate.ValidationException("'%s' is not a valid union %s" %
                                               (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and "itemSeparator" not in binding and st[
                    "type"] in ("array", "map"):
                st["inputBinding"] = {}
            bindings.extend(
                self.bind_input(st,
                                datum,
                                lead_pos=lead_pos,
                                tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(
                            self.bind_input(f,
                                            datum[f["name"]],
                                            lead_pos=lead_pos,
                                            tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "map":
                for n, item in datum.items():
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = [n, item]
                    bindings.extend(
                        self.bind_input(
                            {
                                "type": schema["values"],
                                "inputBinding": b2
                            },
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos))
                binding = None

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = item
                    bindings.extend(
                        self.bind_input(
                            {
                                "type": schema["items"],
                                "inputBinding": b2
                            },
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding and binding.get("loadContents"):
                    with self.fs_access.open(datum["path"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            sfpath = self.do_eval(sf, context=datum)
                            if isinstance(sfpath, basestring):
                                sfpath = {"path": sfpath, "class": "File"}
                        else:
                            sfpath = {
                                "path": substitute(datum["path"], sf),
                                "class": "File"
                            }
                        if isinstance(sfpath, list):
                            datum["secondaryFiles"].extend(sfpath)
                        else:
                            datum["secondaryFiles"].append(sfpath)
                for sf in datum.get("secondaryFiles", []):
                    self.files.append(sf)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
    def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
        bindings = []
        binding = None
        if "inputBinding" in schema and isinstance(schema["inputBinding"], dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            if "valueFrom" in binding:
                binding["do_eval"] = binding["valueFrom"]
            binding["valueFrom"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, basestring) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos)
            raise validate.ValidationException("'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if (
                binding
                and "inputBinding" not in st
                and "itemSeparator" not in binding
                and st["type"] in ("array", "map")
            ):
                st["inputBinding"] = {}
            bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "map":
                for n, item in datum.items():
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = [n, item]
                    bindings.extend(
                        self.bind_input(
                            {"type": schema["values"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos
                        )
                    )
                binding = None

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = item
                    bindings.extend(
                        self.bind_input(
                            {"type": schema["items"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos
                        )
                    )
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding:
                    if binding.get("loadContents"):
                        with self.fs_access.open(datum["path"], "rb") as f:
                            datum["contents"] = f.read(CONTENT_LIMIT)

                    if "secondaryFiles" in binding:
                        if "secondaryFiles" not in datum:
                            datum["secondaryFiles"] = []
                        for sf in aslist(binding["secondaryFiles"]):
                            if isinstance(sf, dict):
                                sfpath = self.do_eval(sf, context=datum["path"])
                            else:
                                sfpath = {"path": substitute(datum["path"], sf), "class": "File"}
                            if isinstance(sfpath, list):
                                datum["secondaryFiles"].extend(sfpath)
                                self.files.extend(sfpath)
                            else:
                                datum["secondaryFiles"].append(sfpath)
                                self.files.append(sfpath)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Beispiel #5
0
    def bind_input(self, schema, datum, lead_pos=None, tail_pos=None):
        # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]]
        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []
        bindings = []  # type: List[Dict[Text,Text]]
        binding = None  # type: Dict[Text,Any]
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(
                    binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            binding["datum"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, (str, Text)) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t,
                                dict) and "name" in t and self.names.has_name(
                                    t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema,
                                           datum,
                                           lead_pos=lead_pos,
                                           tail_pos=tail_pos)
            raise validate.ValidationException(
                u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and st[
                    "type"] == "array" and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            bindings.extend(
                self.bind_input(st,
                                datum,
                                lead_pos=lead_pos,
                                tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(
                            self.bind_input(f,
                                            datum[f["name"]],
                                            lead_pos=lead_pos,
                                            tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(itemschema,
                                        item,
                                        lead_pos=n,
                                        tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding:
                    if binding.get("loadContents"):
                        with self.fs_access.open(datum["location"], "rb") as f:
                            datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            secondary_eval = self.do_eval(sf, context=datum)
                            if isinstance(secondary_eval, string_types):
                                sfpath = {
                                    "location": secondary_eval,
                                    "class": "File"
                                }
                            else:
                                sfpath = secondary_eval
                        else:
                            sfpath = {
                                "location": substitute(datum["location"], sf),
                                "class": "File"
                            }
                        if isinstance(sfpath, list):
                            datum["secondaryFiles"].extend(sfpath)
                        else:
                            datum["secondaryFiles"].append(sfpath)
                    normalizeFilesDirs(datum["secondaryFiles"])

                def _capture_files(f):
                    self.files.append(f)
                    return f

                visit_class(datum.get("secondaryFiles", []),
                            ("File", "Directory"), _capture_files)

            if schema["type"] == "Directory":
                ll = self.loadListing or (binding
                                          and binding.get("loadListing"))
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Beispiel #6
0
    def bind_input(self,
                   schema,
                   datum,
                   lead_pos=None,
                   tail_pos=None,
                   discover_secondaryFiles=False):
        # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int], bool) -> List[Dict[Text, Any]]
        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []
        bindings = []  # type: List[Dict[Text,Text]]
        binding = None  # type: Dict[Text,Any]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(
                    binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], list):
            bound_input = False
            for t in schema["type"]:
                if isinstance(t, (str, Text)) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t,
                                dict) and "name" in t and self.names.has_name(
                                    t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = AvroSchemaFromJSONData(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles)
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles)
                        bound_input = True
            if not bound_input:
                raise validate.ValidationException(
                    u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and st[
                    "type"] == "array" and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles)
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(
                            self.bind_input(
                                f,
                                datum[f["name"]],
                                lead_pos=lead_pos,
                                tail_pos=f["name"],
                                discover_secondaryFiles=discover_secondaryFiles
                            ))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if (binding and binding.get("loadContents")
                    ) or schema.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            sfpath = self.do_eval(sf, context=datum)
                        else:
                            sfpath = substitute(datum["basename"], sf)
                        for sfname in aslist(sfpath):
                            found = False
                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1:]
                                if d["basename"] == sfname:
                                    found = True
                            if not found:
                                if isinstance(sfname, dict):
                                    datum["secondaryFiles"].append(sfname)
                                elif discover_secondaryFiles:
                                    datum["secondaryFiles"].append({
                                        "location":
                                        datum["location"]
                                        [0:datum["location"].rindex("/") + 1] +
                                        sfname,
                                        "basename":
                                        sfname,
                                        "class":
                                        "File"
                                    })
                                else:
                                    raise WorkflowException(
                                        "Missing required secondary file '%s' from file object: %s"
                                        %
                                        (sfname, json.dumps(datum, indent=4)))

                    normalizeFilesDirs(datum["secondaryFiles"])

                if "format" in schema:
                    try:
                        checkFormat(datum, self.do_eval(schema["format"]),
                                    self.formatgraph)
                    except validate.ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n  %s"
                            % (schema["name"], schema["format"], ve))

                def _capture_files(f):
                    self.files.append(f)
                    return f

                visit_class(datum.get("secondaryFiles", []),
                            ("File", "Directory"), _capture_files)

            if schema["type"] == "Directory":
                ll = self.loadListing or (binding
                                          and binding.get("loadListing"))
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Beispiel #7
0
    def bind_input(
        self,
        schema: MutableMapping[str, Any],
        datum: Any,
        discover_secondaryFiles: bool,
        lead_pos: Optional[Union[int, List[int]]] = None,
        tail_pos: Optional[List[int]] = None,
    ) -> List[MutableMapping[str, Any]]:

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[str, str]]
        binding = {}  # type: Union[MutableMapping[str, str], CommentedMap]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   MutableMapping):
            binding = CommentedMap(schema["inputBinding"].items())

            bp = list(aslist(lead_pos))
            if "position" in binding:
                position = binding["position"]
                if isinstance(position,
                              str):  # no need to test the CWL Version
                    # the schema for v1.0 only allow ints
                    binding["position"] = self.do_eval(position, context=datum)
                    bp.append(binding["position"])
                else:
                    bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, str) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif (isinstance(t, MutableMapping) and "name" in t
                      and self.names.has_name(t["name"], "")):
                    avsc = self.names.get_name(t["name"], "")
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                        bound_input = True
            if not bound_input:
                raise validate.ValidationException(
                    "'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if (binding and "inputBinding" not in st and "type" in st
                    and st["type"] == "array"
                    and "itemSeparator" not in binding):
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles,
                )
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles,
                    ))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum and datum[f["name"]] is not None:
                        bindings.extend(
                            self.bind_input(
                                f,
                                datum[f["name"]],
                                lead_pos=lead_pos,
                                tail_pos=f["name"],
                                discover_secondaryFiles=discover_secondaryFiles,
                            ))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {"type": schema["items"], "inputBinding": b2}
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        ))
                binding = {}

            def _capture_files(f):  # type: (Dict[str, str]) -> Dict[str, str]
                self.files.append(f)
                return f

            if schema["type"] == "File":
                self.files.append(datum)
                if (binding and binding.get("loadContents")
                    ) or schema.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = content_limit_respected_read(f)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if "required" in sf:
                            sf_required = self.do_eval(sf["required"],
                                                       context=datum)
                        else:
                            sf_required = True

                        if "$(" in sf["pattern"] or "${" in sf["pattern"]:
                            sfpath = self.do_eval(sf["pattern"], context=datum)
                        else:
                            sfpath = substitute(datum["basename"],
                                                sf["pattern"])

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False

                            if isinstance(sfname, str):
                                sf_location = (
                                    datum["location"]
                                    [0:datum["location"].rindex("/") + 1] +
                                    sfname)
                                sfbasename = sfname
                            elif isinstance(sfname, MutableMapping):
                                sf_location = sfname["location"]
                                sfbasename = sfname["basename"]
                            else:
                                raise WorkflowException(
                                    "Expected secondaryFile expression to return type 'str' or 'MutableMapping', received '%s'"
                                    % (type(sfname)))

                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1:]
                                if d["basename"] == sfbasename:
                                    found = True

                            if not found:

                                def addsf(
                                    files: MutableSequence[MutableMapping[
                                        str, Any]],
                                    newsf: MutableMapping[str, Any],
                                ) -> None:
                                    for f in files:
                                        if f["location"] == newsf["location"]:
                                            f["basename"] = newsf["basename"]
                                            return
                                    files.append(newsf)

                                if isinstance(sfname, MutableMapping):
                                    addsf(datum["secondaryFiles"], sfname)
                                elif discover_secondaryFiles and self.fs_access.exists(
                                        sf_location):
                                    addsf(
                                        datum["secondaryFiles"],
                                        {
                                            "location": sf_location,
                                            "basename": sfname,
                                            "class": "File",
                                        },
                                    )
                                elif sf_required:
                                    raise WorkflowException(
                                        "Missing required secondary file '%s' from file object: %s"
                                        %
                                        (sfname, json_dumps(datum, indent=4)))

                    normalizeFilesDirs(datum["secondaryFiles"])

                if "format" in schema:
                    try:
                        check_format(datum, self.do_eval(schema["format"]),
                                     self.formatgraph)
                    except validate.ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" %
                            (schema["name"], schema["format"], ve)) from ve

                visit_class(
                    datum.get("secondaryFiles", []),
                    ("File", "Directory"),
                    _capture_files,
                )

            if schema["type"] == "Directory":
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Beispiel #8
0
    def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
        # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]]
        bindings = []  # type: List[Dict[Text,Text]]
        binding = None  # type: Dict[Text,Any]
        if "inputBinding" in schema and isinstance(schema["inputBinding"], dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            binding["datum"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, (str, Text)) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos)
            raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and st["type"] == "array" and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding and binding.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            secondary_eval = self.do_eval(sf, context=datum)
                            if isinstance(secondary_eval, basestring):
                                sfpath = {"location": secondary_eval,
                                          "class": "File"}
                            else:
                                sfpath = secondary_eval
                        else:
                            sfpath = {"location": substitute(datum["location"], sf), "class": "File"}
                        if isinstance(sfpath, list):
                            datum["secondaryFiles"].extend(sfpath)
                        else:
                            datum["secondaryFiles"].append(sfpath)
                    normalizeFilesDirs(datum["secondaryFiles"])

                def _capture_files(f):
                    self.files.append(f)
                    return f

                adjustFileObjs(datum.get("secondaryFiles", []), _capture_files)

            if schema["type"] == "Directory":
                self.files.append(datum)


        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Beispiel #9
0
    def bind_input(self, schema, datum, lead_pos=None, tail_pos=None):
        # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]]
        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []
        bindings = []  # type: List[Dict[Text,Text]]
        binding = None  # type: Dict[Text,Any]
        if "inputBinding" in schema and isinstance(schema["inputBinding"], dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            binding["datum"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, (str, Text)) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = AvroSchemaFromJSONData(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos)
            raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and st["type"] == "array" and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding:
                    if binding.get("loadContents"):
                        with self.fs_access.open(datum["location"], "rb") as f:
                            datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            sfpath = self.do_eval(sf, context=datum)
                        else:
                            sfpath = substitute(datum["basename"], sf)
                        for sfname in aslist(sfpath):
                            found = False
                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][d["location"].rindex("/")+1:]
                                if d["basename"] == sfname:
                                    found = True
                            if not found:
                                if isinstance(sfname, dict):
                                    datum["secondaryFiles"].append(sfname)
                                else:
                                    datum["secondaryFiles"].append({
                                        "location": datum["location"][0:datum["location"].rindex("/")+1]+sfname,
                                        "basename": sfname,
                                        "class": "File"})

                    normalizeFilesDirs(datum["secondaryFiles"])

                def _capture_files(f):
                    self.files.append(f)
                    return f

                visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files)

            if schema["type"] == "Directory":
                ll = self.loadListing or (binding and binding.get("loadListing"))
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings