def bind_input( self, schema: CWLObjectType, datum: Union[CWLObjectType, List[CWLObjectType]], discover_secondaryFiles: bool, lead_pos: Optional[Union[int, List[int]]] = None, tail_pos: Optional[Union[str, List[int]]] = None, ) -> List[MutableMapping[str, Union[str, List[int]]]]: debug = _logger.isEnabledFor(logging.DEBUG) if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[MutableMapping[str, Union[str, List[int]]]] binding = ( {} ) # type: Union[MutableMapping[str, Union[str, List[int]]], CommentedMap] value_from_expression = False if "inputBinding" in schema and isinstance( schema["inputBinding"], MutableMapping ): binding = CommentedMap(schema["inputBinding"].items()) bp = list(aslist(lead_pos)) if "position" in binding: position = binding["position"] if isinstance(position, str): # no need to test the CWL Version # the schema for v1.0 only allow ints result = self.do_eval(position, context=datum) if not isinstance(result, int): raise SourceLine( schema["inputBinding"], "position", WorkflowException, debug ).makeError( "'position' expressions must evaluate to an int, " f"not a {type(result)}. Expression {position} " f"resulted in '{result}'." ) binding["position"] = result bp.append(result) else: bp.extend(aslist(binding["position"])) else: bp.append(0) bp.extend(aslist(tail_pos)) binding["position"] = bp binding["datum"] = datum if "valueFrom" in binding: value_from_expression = True # Handle union types if isinstance(schema["type"], MutableSequence): bound_input = False for t in schema["type"]: avsc = None # type: Optional[Schema] if isinstance(t, str) and self.names.has_name(t, None): avsc = self.names.get_name(t, None) elif ( isinstance(t, MutableMapping) and "name" in t and self.names.has_name(cast(str, t["name"]), None) ): avsc = self.names.get_name(cast(str, t["name"]), None) if not avsc: avsc = make_avsc_object(convert_to_dict(t), self.names) if validate(avsc, datum, vocab=INPUT_OBJ_VOCAB): schema = copy.deepcopy(schema) schema["type"] = t if not value_from_expression: return self.bind_input( schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) else: self.bind_input( schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) bound_input = True if not bound_input: raise ValidationException( "'{}' is not a valid union {}".format(datum, schema["type"]) ) elif isinstance(schema["type"], MutableMapping): st = copy.deepcopy(schema["type"]) if ( binding and "inputBinding" not in st and "type" in st and st["type"] == "array" and "itemSeparator" not in binding ): st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] if value_from_expression: self.bind_input( st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) else: bindings.extend( self.bind_input( st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) ) else: if schema["type"] == "org.w3id.cwl.salad.Any": if isinstance(datum, dict): if datum.get("class") == "File": schema["type"] = "org.w3id.cwl.cwl.File" elif datum.get("class") == "Directory": schema["type"] = "org.w3id.cwl.cwl.Directory" else: schema["type"] = "record" schema["fields"] = [ {"name": field_name, "type": "Any"} for field_name in datum.keys() ] elif isinstance(datum, list): schema["type"] = "array" schema["items"] = "Any" if schema["type"] in self.schemaDefs: schema = self.schemaDefs[cast(str, schema["type"])] if schema["type"] == "record": datum = cast(CWLObjectType, datum) for f in cast(List[CWLObjectType], schema["fields"]): name = cast(str, f["name"]) if name in datum and datum[name] is not None: bindings.extend( self.bind_input( f, cast(CWLObjectType, datum[name]), lead_pos=lead_pos, tail_pos=name, discover_secondaryFiles=discover_secondaryFiles, ) ) else: datum[name] = f.get("default") if schema["type"] == "array": for n, item in enumerate(cast(MutableSequence[CWLObjectType], datum)): b2 = None if binding: b2 = cast(CWLObjectType, copy.deepcopy(binding)) b2["datum"] = item itemschema = { "type": schema["items"], "inputBinding": b2, } # type: CWLObjectType for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input( itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) ) binding = {} def _capture_files(f: CWLObjectType) -> CWLObjectType: self.files.append(f) return f if schema["type"] == "org.w3id.cwl.cwl.File": datum = cast(CWLObjectType, datum) self.files.append(datum) loadContents_sourceline = ( None ) # type: Union[None, MutableMapping[str, Union[str, List[int]]], CWLObjectType] if binding and binding.get("loadContents"): loadContents_sourceline = binding elif schema.get("loadContents"): loadContents_sourceline = schema if loadContents_sourceline and loadContents_sourceline["loadContents"]: with SourceLine( loadContents_sourceline, "loadContents", WorkflowException, debug, ): try: with self.fs_access.open( cast(str, datum["location"]), "rb" ) as f2: datum["contents"] = content_limit_respected_read(f2) except Exception as e: raise Exception( "Reading {}\n{}".format(datum["location"], e) ) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] sf_schema = aslist(schema["secondaryFiles"]) elif not discover_secondaryFiles: sf_schema = [] # trust the inputs else: sf_schema = aslist(schema["secondaryFiles"]) for num, sf_entry in enumerate(sf_schema): if "required" in sf_entry and sf_entry["required"] is not None: required_result = self.do_eval( sf_entry["required"], context=datum ) if not ( isinstance(required_result, bool) or required_result is None ): if sf_schema == schema["secondaryFiles"]: sf_item: Any = sf_schema[num] else: sf_item = sf_schema raise SourceLine( sf_item, "required", WorkflowException, debug ).makeError( "The result of a expression in the field " "'required' must " f"be a bool or None, not a {type(required_result)}. " f"Expression '{sf_entry['required']}' resulted " f"in '{required_result}'." ) sf_required = required_result else: sf_required = True if "$(" in sf_entry["pattern"] or "${" in sf_entry["pattern"]: sfpath = self.do_eval(sf_entry["pattern"], context=datum) else: sfpath = substitute( cast(str, datum["basename"]), sf_entry["pattern"] ) for sfname in aslist(sfpath): if not sfname: continue found = False if isinstance(sfname, str): d_location = cast(str, datum["location"]) if "/" in d_location: sf_location = ( d_location[0 : d_location.rindex("/") + 1] + sfname ) else: sf_location = d_location + sfname sfbasename = sfname elif isinstance(sfname, MutableMapping): sf_location = sfname["location"] sfbasename = sfname["basename"] else: raise SourceLine( sf_entry, "pattern", WorkflowException, debug ).makeError( "Expected secondaryFile expression to " "return type 'str', a 'File' or 'Directory' " "dictionary, or a list of the same. Received " f"'{type(sfname)} from '{sf_entry['pattern']}'." ) for d in cast( MutableSequence[MutableMapping[str, str]], datum["secondaryFiles"], ): if not d.get("basename"): d["basename"] = d["location"][ d["location"].rindex("/") + 1 : ] if d["basename"] == sfbasename: found = True if not found: def addsf( files: MutableSequence[CWLObjectType], newsf: CWLObjectType, ) -> None: for f in files: if f["location"] == newsf["location"]: f["basename"] = newsf["basename"] return files.append(newsf) if isinstance(sfname, MutableMapping): addsf( cast( MutableSequence[CWLObjectType], datum["secondaryFiles"], ), sfname, ) elif discover_secondaryFiles and self.fs_access.exists( sf_location ): addsf( cast( MutableSequence[CWLObjectType], datum["secondaryFiles"], ), { "location": sf_location, "basename": sfname, "class": "File", }, ) elif sf_required: raise SourceLine( schema, "secondaryFiles", WorkflowException, debug, ).makeError( "Missing required secondary file '%s' from file object: %s" % (sfname, json_dumps(datum, indent=4)) ) normalizeFilesDirs( cast(MutableSequence[CWLObjectType], datum["secondaryFiles"]) ) if "format" in schema: eval_format: Any = self.do_eval(schema["format"]) if isinstance(eval_format, str): evaluated_format: Union[str, List[str]] = eval_format elif isinstance(eval_format, MutableSequence): for index, entry in enumerate(eval_format): message = None if not isinstance(entry, str): message = ( "An expression in the 'format' field must " "evaluate to a string, or list of strings. " "However a non-string item was received: " f"'{entry}' of type '{type(entry)}'. " f"The expression was '{schema['format']}' and " f"its fully evaluated result is '{eval_format}'." ) if expression.needs_parsing(entry): message = ( "For inputs, 'format' field can either " "contain a single CWL Expression or CWL Parameter " "Reference, a single format string, or a list of " "format strings. But the list cannot contain CWL " "Expressions or CWL Parameter References. List " f"entry number {index+1} contains the following " "unallowed CWL Parameter Reference or Expression: " f"'{entry}'." ) if message: raise SourceLine( schema["format"], index, WorkflowException, debug ).makeError(message) evaluated_format = cast(List[str], eval_format) else: raise SourceLine( schema, "format", WorkflowException, debug ).makeError( "An expression in the 'format' field must " "evaluate to a string, or list of strings. " "However the type of the expression result was " f"{type(eval_format)}. " f"The expression was '{schema['format']}' and " f"its fully evaluated result is 'eval_format'." ) try: check_format( datum, evaluated_format, self.formatgraph, ) except ValidationException as ve: raise WorkflowException( "Expected value of '%s' to have format %s but\n " " %s" % (schema["name"], schema["format"], ve) ) from ve visit_class( datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files, ) if schema["type"] == "org.w3id.cwl.cwl.Directory": datum = cast(CWLObjectType, datum) ll = schema.get("loadListing") or self.loadListing if ll and ll != "no_listing": get_listing( self.fs_access, datum, (ll == "deep_listing"), ) self.files.append(datum) if schema["type"] == "Any": visit_class(datum, ("File", "Directory"), _capture_files) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = cast(List[int], binding["position"]) + cast( List[int], bi["position"] ) bindings.append(binding) return bindings
def bind_input(self, schema, # type: MutableMapping[Text, Any] datum, # type: Any discover_secondaryFiles, # type: bool lead_pos=None, # type: Optional[Union[int, List[int]]] tail_pos=None, # type: Optional[List[int]] ): # type: (...) -> List[MutableMapping[Text, Any]] if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[MutableMapping[Text, Text]] binding = None # type: Optional[MutableMapping[Text,Any]] value_from_expression = False if "inputBinding" in schema and isinstance(schema["inputBinding"], MutableMapping): binding = CommentedMap(schema["inputBinding"].items()) assert binding is not None bp = list(aslist(lead_pos)) if "position" in binding: bp.extend(aslist(binding["position"])) else: bp.append(0) bp.extend(aslist(tail_pos)) binding["position"] = bp binding["datum"] = datum if "valueFrom" in binding: value_from_expression = True # Handle union types if isinstance(schema["type"], MutableSequence): bound_input = False for t in schema["type"]: avsc = None # type: Optional[Schema] if isinstance(t, string_types) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, MutableMapping) and "name" in t and self.names.has_name(t["name"], ""): avsc = self.names.get_name(t["name"], "") if not avsc: avsc = make_avsc_object(convert_to_dict(t), self.names) assert avsc is not None if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t if not value_from_expression: return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) bound_input = True if not bound_input: raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], MutableMapping): st = copy.deepcopy(schema["type"]) if binding is not None\ and "inputBinding" not in st\ and "type" in st\ and st["type"] == "array"\ and "itemSeparator" not in binding: st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] if value_from_expression: self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum and datum[f["name"]] is not None: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles)) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding is not None: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = { u"type": schema["items"], u"inputBinding": b2 } for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) binding = None def _capture_files(f): self.files.append(f) return f if schema["type"] == "File": self.files.append(datum) if (binding and binding.get("loadContents")) or schema.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT).decode("utf-8") if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if 'required' in sf: sf_required = self.do_eval(sf['required'], context=datum) else: sf_required = True if "$(" in sf["pattern"] or "${" in sf["pattern"]: sfpath = self.do_eval(sf["pattern"], context=datum) else: sfpath = substitute(datum["basename"], sf["pattern"]) for sfname in aslist(sfpath): if not sfname: continue found = False for d in datum["secondaryFiles"]: if not d.get("basename"): d["basename"] = d["location"][d["location"].rindex("/")+1:] if d["basename"] == sfname: found = True if not found: sf_location = datum["location"][0:datum["location"].rindex("/")+1]+sfname if isinstance(sfname, MutableMapping): datum["secondaryFiles"].append(sfname) elif discover_secondaryFiles and self.fs_access.exists(sf_location): datum["secondaryFiles"].append({ "location": sf_location, "basename": sfname, "class": "File"}) elif sf_required: raise WorkflowException("Missing required secondary file '%s' from file object: %s" % ( sfname, json_dumps(datum, indent=4))) normalizeFilesDirs(datum["secondaryFiles"]) if "format" in schema: try: check_format(datum, self.do_eval(schema["format"]), self.formatgraph) except validate.ValidationException as ve: raise WorkflowException( "Expected value of '%s' to have format %s but\n " " %s" % (schema["name"], schema["format"], ve)) visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files) if schema["type"] == "Directory": ll = schema.get("loadListing") or self.loadListing if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) if schema["type"] == "Any": visit_class(datum, ("File", "Directory"), _capture_files) # Position to front of the sort key if binding is not None: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): bindings = [] binding = None if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist( binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) if "valueFrom" in binding: binding["do_eval"] = binding["valueFrom"] binding["valueFrom"] = datum # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, basestring) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name( t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = avro.schema.make_avsc_object(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise validate.ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and "itemSeparator" not in binding and st[ "type"] in ("array", "map"): st["inputBinding"] = {} bindings.extend( self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend( self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) else: datum[f["name"]] = f.get("default") if schema["type"] == "map": for n, item in datum.items(): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = [n, item] bindings.extend( self.bind_input( { "type": schema["values"], "inputBinding": b2 }, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = item bindings.extend( self.bind_input( { "type": schema["items"], "inputBinding": b2 }, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "File": self.files.append(datum) if binding and binding.get("loadContents"): with self.fs_access.open(datum["path"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = self.do_eval(sf, context=datum) if isinstance(sfpath, basestring): sfpath = {"path": sfpath, "class": "File"} else: sfpath = { "path": substitute(datum["path"], sf), "class": "File" } if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) else: datum["secondaryFiles"].append(sfpath) for sf in datum.get("secondaryFiles", []): self.files.append(sf) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): bindings = [] binding = None if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) if "valueFrom" in binding: binding["do_eval"] = binding["valueFrom"] binding["valueFrom"] = datum # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, basestring) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = avro.schema.make_avsc_object(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise validate.ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if ( binding and "inputBinding" not in st and "itemSeparator" not in binding and st["type"] in ("array", "map") ): st["inputBinding"] = {} bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) else: datum[f["name"]] = f.get("default") if schema["type"] == "map": for n, item in datum.items(): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = [n, item] bindings.extend( self.bind_input( {"type": schema["values"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos ) ) binding = None if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = item bindings.extend( self.bind_input( {"type": schema["items"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos ) ) binding = None if schema["type"] == "File": self.files.append(datum) if binding: if binding.get("loadContents"): with self.fs_access.open(datum["path"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in binding: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(binding["secondaryFiles"]): if isinstance(sf, dict): sfpath = self.do_eval(sf, context=datum["path"]) else: sfpath = {"path": substitute(datum["path"], sf), "class": "File"} if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) self.files.extend(sfpath) else: datum["secondaryFiles"].append(sfpath) self.files.append(sfpath) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input(self, schema, datum, lead_pos=None, tail_pos=None): # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]] if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[Dict[Text,Text]] binding = None # type: Dict[Text,Any] if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist( binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) binding["datum"] = datum # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, (str, Text)) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name( t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = avro.schema.make_avsc_object(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise validate.ValidationException( u"'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and st[ "type"] == "array" and "itemSeparator" not in binding: st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] bindings.extend( self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend( self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = { u"type": schema["items"], u"inputBinding": b2 } for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "File": self.files.append(datum) if binding: if binding.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: secondary_eval = self.do_eval(sf, context=datum) if isinstance(secondary_eval, string_types): sfpath = { "location": secondary_eval, "class": "File" } else: sfpath = secondary_eval else: sfpath = { "location": substitute(datum["location"], sf), "class": "File" } if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) else: datum["secondaryFiles"].append(sfpath) normalizeFilesDirs(datum["secondaryFiles"]) def _capture_files(f): self.files.append(f) return f visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files) if schema["type"] == "Directory": ll = self.loadListing or (binding and binding.get("loadListing")) if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input(self, schema, datum, lead_pos=None, tail_pos=None, discover_secondaryFiles=False): # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int], bool) -> List[Dict[Text, Any]] if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[Dict[Text,Text]] binding = None # type: Dict[Text,Any] value_from_expression = False if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist( binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) binding["datum"] = datum if "valueFrom" in binding: value_from_expression = True # Handle union types if isinstance(schema["type"], list): bound_input = False for t in schema["type"]: if isinstance(t, (str, Text)) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name( t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = AvroSchemaFromJSONData(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t if not value_from_expression: return self.bind_input( schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: self.bind_input( schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) bound_input = True if not bound_input: raise validate.ValidationException( u"'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and st[ "type"] == "array" and "itemSeparator" not in binding: st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] if value_from_expression: self.bind_input( st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: bindings.extend( self.bind_input( st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend( self.bind_input( f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles )) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = { u"type": schema["items"], u"inputBinding": b2 } for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input( itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) binding = None if schema["type"] == "File": self.files.append(datum) if (binding and binding.get("loadContents") ) or schema.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = self.do_eval(sf, context=datum) else: sfpath = substitute(datum["basename"], sf) for sfname in aslist(sfpath): found = False for d in datum["secondaryFiles"]: if not d.get("basename"): d["basename"] = d["location"][ d["location"].rindex("/") + 1:] if d["basename"] == sfname: found = True if not found: if isinstance(sfname, dict): datum["secondaryFiles"].append(sfname) elif discover_secondaryFiles: datum["secondaryFiles"].append({ "location": datum["location"] [0:datum["location"].rindex("/") + 1] + sfname, "basename": sfname, "class": "File" }) else: raise WorkflowException( "Missing required secondary file '%s' from file object: %s" % (sfname, json.dumps(datum, indent=4))) normalizeFilesDirs(datum["secondaryFiles"]) if "format" in schema: try: checkFormat(datum, self.do_eval(schema["format"]), self.formatgraph) except validate.ValidationException as ve: raise WorkflowException( "Expected value of '%s' to have format %s but\n %s" % (schema["name"], schema["format"], ve)) def _capture_files(f): self.files.append(f) return f visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files) if schema["type"] == "Directory": ll = self.loadListing or (binding and binding.get("loadListing")) if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input( self, schema: MutableMapping[str, Any], datum: Any, discover_secondaryFiles: bool, lead_pos: Optional[Union[int, List[int]]] = None, tail_pos: Optional[List[int]] = None, ) -> List[MutableMapping[str, Any]]: if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[MutableMapping[str, str]] binding = {} # type: Union[MutableMapping[str, str], CommentedMap] value_from_expression = False if "inputBinding" in schema and isinstance(schema["inputBinding"], MutableMapping): binding = CommentedMap(schema["inputBinding"].items()) bp = list(aslist(lead_pos)) if "position" in binding: position = binding["position"] if isinstance(position, str): # no need to test the CWL Version # the schema for v1.0 only allow ints binding["position"] = self.do_eval(position, context=datum) bp.append(binding["position"]) else: bp.extend(aslist(binding["position"])) else: bp.append(0) bp.extend(aslist(tail_pos)) binding["position"] = bp binding["datum"] = datum if "valueFrom" in binding: value_from_expression = True # Handle union types if isinstance(schema["type"], MutableSequence): bound_input = False for t in schema["type"]: avsc = None # type: Optional[Schema] if isinstance(t, str) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif (isinstance(t, MutableMapping) and "name" in t and self.names.has_name(t["name"], "")): avsc = self.names.get_name(t["name"], "") if not avsc: avsc = make_avsc_object(convert_to_dict(t), self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t if not value_from_expression: return self.bind_input( schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) else: self.bind_input( schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) bound_input = True if not bound_input: raise validate.ValidationException( "'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], MutableMapping): st = copy.deepcopy(schema["type"]) if (binding and "inputBinding" not in st and "type" in st and st["type"] == "array" and "itemSeparator" not in binding): st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] if value_from_expression: self.bind_input( st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) else: bindings.extend( self.bind_input( st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, )) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum and datum[f["name"]] is not None: bindings.extend( self.bind_input( f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles, )) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = {"type": schema["items"], "inputBinding": b2} for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input( itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, )) binding = {} def _capture_files(f): # type: (Dict[str, str]) -> Dict[str, str] self.files.append(f) return f if schema["type"] == "File": self.files.append(datum) if (binding and binding.get("loadContents") ) or schema.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = content_limit_respected_read(f) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if "required" in sf: sf_required = self.do_eval(sf["required"], context=datum) else: sf_required = True if "$(" in sf["pattern"] or "${" in sf["pattern"]: sfpath = self.do_eval(sf["pattern"], context=datum) else: sfpath = substitute(datum["basename"], sf["pattern"]) for sfname in aslist(sfpath): if not sfname: continue found = False if isinstance(sfname, str): sf_location = ( datum["location"] [0:datum["location"].rindex("/") + 1] + sfname) sfbasename = sfname elif isinstance(sfname, MutableMapping): sf_location = sfname["location"] sfbasename = sfname["basename"] else: raise WorkflowException( "Expected secondaryFile expression to return type 'str' or 'MutableMapping', received '%s'" % (type(sfname))) for d in datum["secondaryFiles"]: if not d.get("basename"): d["basename"] = d["location"][ d["location"].rindex("/") + 1:] if d["basename"] == sfbasename: found = True if not found: def addsf( files: MutableSequence[MutableMapping[ str, Any]], newsf: MutableMapping[str, Any], ) -> None: for f in files: if f["location"] == newsf["location"]: f["basename"] = newsf["basename"] return files.append(newsf) if isinstance(sfname, MutableMapping): addsf(datum["secondaryFiles"], sfname) elif discover_secondaryFiles and self.fs_access.exists( sf_location): addsf( datum["secondaryFiles"], { "location": sf_location, "basename": sfname, "class": "File", }, ) elif sf_required: raise WorkflowException( "Missing required secondary file '%s' from file object: %s" % (sfname, json_dumps(datum, indent=4))) normalizeFilesDirs(datum["secondaryFiles"]) if "format" in schema: try: check_format(datum, self.do_eval(schema["format"]), self.formatgraph) except validate.ValidationException as ve: raise WorkflowException( "Expected value of '%s' to have format %s but\n " " %s" % (schema["name"], schema["format"], ve)) from ve visit_class( datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files, ) if schema["type"] == "Directory": ll = schema.get("loadListing") or self.loadListing if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) if schema["type"] == "Any": visit_class(datum, ("File", "Directory"), _capture_files) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]] bindings = [] # type: List[Dict[Text,Text]] binding = None # type: Dict[Text,Any] if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) binding["datum"] = datum # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, (str, Text)) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = avro.schema.make_avsc_object(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and st["type"] == "array" and "itemSeparator" not in binding: st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = { u"type": schema["items"], u"inputBinding": b2 } for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "File": self.files.append(datum) if binding and binding.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: secondary_eval = self.do_eval(sf, context=datum) if isinstance(secondary_eval, basestring): sfpath = {"location": secondary_eval, "class": "File"} else: sfpath = secondary_eval else: sfpath = {"location": substitute(datum["location"], sf), "class": "File"} if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) else: datum["secondaryFiles"].append(sfpath) normalizeFilesDirs(datum["secondaryFiles"]) def _capture_files(f): self.files.append(f) return f adjustFileObjs(datum.get("secondaryFiles", []), _capture_files) if schema["type"] == "Directory": self.files.append(datum) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input(self, schema, datum, lead_pos=None, tail_pos=None): # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]] if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[Dict[Text,Text]] binding = None # type: Dict[Text,Any] if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) binding["datum"] = datum # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, (str, Text)) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = AvroSchemaFromJSONData(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and st["type"] == "array" and "itemSeparator" not in binding: st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = { u"type": schema["items"], u"inputBinding": b2 } for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "File": self.files.append(datum) if binding: if binding.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = self.do_eval(sf, context=datum) else: sfpath = substitute(datum["basename"], sf) for sfname in aslist(sfpath): found = False for d in datum["secondaryFiles"]: if not d.get("basename"): d["basename"] = d["location"][d["location"].rindex("/")+1:] if d["basename"] == sfname: found = True if not found: if isinstance(sfname, dict): datum["secondaryFiles"].append(sfname) else: datum["secondaryFiles"].append({ "location": datum["location"][0:datum["location"].rindex("/")+1]+sfname, "basename": sfname, "class": "File"}) normalizeFilesDirs(datum["secondaryFiles"]) def _capture_files(f): self.files.append(f) return f visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files) if schema["type"] == "Directory": ll = self.loadListing or (binding and binding.get("loadListing")) if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings