def bind_input(self, schema, # type: Dict[Text, Any] datum, # type: Any discover_secondaryFiles, # type: bool lead_pos=None, # type: Optional[Union[int, List[int]]] tail_pos=None, # type: Optional[List[int]] ): # type: (...) -> List[Dict[Text, Any]] if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[Dict[Text,Text]] binding = None # type: Optional[Dict[Text,Any]] value_from_expression = False if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) binding["datum"] = datum if "valueFrom" in binding: value_from_expression = True # Handle union types if isinstance(schema["type"], list): bound_input = False for t in schema["type"]: if isinstance(t, string_types) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = AvroSchemaFromJSONData(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t if not value_from_expression: return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) bound_input = True if not bound_input: raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and st["type"] == "array" and "itemSeparator" not in binding: st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] if value_from_expression: self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum and datum[f["name"]] is not None: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles)) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = { u"type": schema["items"], u"inputBinding": b2 } for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) binding = None if schema["type"] == "File": self.files.append(datum) if (binding and binding.get("loadContents")) or schema.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = self.do_eval(sf, context=datum) else: sfpath = substitute(datum["basename"], sf) for sfname in aslist(sfpath): found = False for d in datum["secondaryFiles"]: if not d.get("basename"): d["basename"] = d["location"][d["location"].rindex("/")+1:] if d["basename"] == sfname: found = True if not found: if isinstance(sfname, dict): datum["secondaryFiles"].append(sfname) elif discover_secondaryFiles: datum["secondaryFiles"].append({ "location": datum["location"][0:datum["location"].rindex("/")+1]+sfname, "basename": sfname, "class": "File"}) else: raise WorkflowException("Missing required secondary file '%s' from file object: %s" % ( sfname, json_dumps(datum, indent=4))) normalizeFilesDirs(datum["secondaryFiles"]) if "format" in schema: try: check_format(datum, self.do_eval(schema["format"]), self.formatgraph) except validate.ValidationException as ve: raise WorkflowException( "Expected value of '%s' to have format %s but\n " " %s" % (schema["name"], schema["format"], ve)) def _capture_files(f): self.files.append(f) return f visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files) if schema["type"] == "Directory": ll = self.loadListing or (binding and binding.get("loadListing")) if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input(self, schema, # type: Dict[Text, Any] datum, # type: Any discover_secondaryFiles, # type: bool lead_pos=None, # type: Optional[Union[int, List[int]]] tail_pos=None, # type: Optional[List[int]] ): # type: (...) -> List[Dict[Text, Any]] if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[Dict[Text,Text]] binding = None # type: Optional[Dict[Text,Any]] value_from_expression = False if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) binding["datum"] = datum if "valueFrom" in binding: value_from_expression = True # Handle union types if isinstance(schema["type"], list): bound_input = False for t in schema["type"]: if isinstance(t, string_types) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = AvroSchemaFromJSONData(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t if not value_from_expression: return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) bound_input = True if not bound_input: raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and st["type"] == "array" and "itemSeparator" not in binding: st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] if value_from_expression: self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum and datum[f["name"]] is not None: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles)) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = { u"type": schema["items"], u"inputBinding": b2 } for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) binding = None if schema["type"] == "File": self.files.append(datum) if (binding and binding.get("loadContents")) or schema.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT).decode("utf-8") if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = self.do_eval(sf, context=datum) else: sfpath = substitute(datum["basename"], sf) for sfname in aslist(sfpath): found = False for d in datum["secondaryFiles"]: if not d.get("basename"): d["basename"] = d["location"][d["location"].rindex("/")+1:] if d["basename"] == sfname: found = True if not found: if isinstance(sfname, dict): datum["secondaryFiles"].append(sfname) elif discover_secondaryFiles: datum["secondaryFiles"].append({ "location": datum["location"][0:datum["location"].rindex("/")+1]+sfname, "basename": sfname, "class": "File"}) else: raise WorkflowException("Missing required secondary file '%s' from file object: %s" % ( sfname, json_dumps(datum, indent=4))) normalizeFilesDirs(datum["secondaryFiles"]) if "format" in schema: try: check_format(datum, self.do_eval(schema["format"]), self.formatgraph) except validate.ValidationException as ve: raise WorkflowException( "Expected value of '%s' to have format %s but\n " " %s" % (schema["name"], schema["format"], ve)) def _capture_files(f): self.files.append(f) return f visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files) if schema["type"] == "Directory": ll = self.loadListing or (binding and binding.get("loadListing")) if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings