def __init__(self, toolpath_object: MutableMapping[str, Any], loadingContext: LoadingContext) -> None: """Build a Process object from the provided dictionary.""" super(Process, self).__init__() self.metadata = getdefault(loadingContext.metadata, {}) # type: Dict[str,Any] self.provenance_object = None # type: Optional[ProvenanceProfile] self.parent_wf = None # type: Optional[ProvenanceProfile] global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY # pylint: disable=global-statement if SCHEMA_FILE is None or SCHEMA_ANY is None or SCHEMA_DIR is None: get_schema("v1.0") SCHEMA_ANY = cast( Dict[str, Any], SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/salad#Any"], ) SCHEMA_FILE = cast( Dict[str, Any], SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/cwl#File"], ) SCHEMA_DIR = cast( Dict[str, Any], SCHEMA_CACHE["v1.0"] [3].idx["https://w3id.org/cwl/cwl#Directory"], ) self.names = schema.make_avro_schema( [SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY], Loader({})) self.tool = toolpath_object self.requirements = copy.deepcopy( getdefault(loadingContext.requirements, [])) self.requirements.extend(self.tool.get("requirements", [])) if "id" not in self.tool: self.tool["id"] = "_:" + str(uuid.uuid4()) self.requirements.extend( get_overrides(getdefault(loadingContext.overrides_list, []), self.tool["id"]).get("requirements", [])) self.hints = copy.deepcopy(getdefault(loadingContext.hints, [])) self.hints.extend(self.tool.get("hints", [])) # Versions of requirements and hints which aren't mutated. self.original_requirements = copy.deepcopy(self.requirements) self.original_hints = copy.deepcopy(self.hints) self.doc_loader = loadingContext.loader self.doc_schema = loadingContext.avsc_names self.formatgraph = None # type: Optional[Graph] if self.doc_loader is not None: self.formatgraph = self.doc_loader.graph checkRequirements(self.tool, supportedProcessRequirements) self.validate_hints( loadingContext.avsc_names, self.tool.get("hints", []), strict=getdefault(loadingContext.strict, False), ) self.schemaDefs = {} # type: Dict[str,Dict[str, Any]] sd, _ = self.get_requirement("SchemaDefRequirement") if sd is not None: sdtypes = avroize_type(sd["types"]) av = schema.make_valid_avro(sdtypes, {t["name"]: t for t in sdtypes}, set()) for i in av: self.schemaDefs[i["name"]] = i # type: ignore schema.make_avsc_object(schema.convert_to_dict(av), self.names) # Build record schema from inputs self.inputs_record_schema = { "name": "input_record_schema", "type": "record", "fields": [], } # type: Dict[str, Any] self.outputs_record_schema = { "name": "outputs_record_schema", "type": "record", "fields": [], } # type: Dict[str, Any] for key in ("inputs", "outputs"): for i in self.tool[key]: c = copy.deepcopy(i) c["name"] = shortname(c["id"]) del c["id"] if "type" not in c: raise validate.ValidationException( "Missing 'type' in parameter '{}'".format(c["name"])) if "default" in c and "null" not in aslist(c["type"]): nullable = ["null"] nullable.extend(aslist(c["type"])) c["type"] = nullable else: c["type"] = c["type"] c["type"] = avroize_type(c["type"], c["name"]) if key == "inputs": self.inputs_record_schema["fields"].append(c) elif key == "outputs": self.outputs_record_schema["fields"].append(c) with SourceLine(toolpath_object, "inputs", validate.ValidationException): self.inputs_record_schema = cast( Dict[str, Any], schema.make_valid_avro(self.inputs_record_schema, {}, set()), ) schema.make_avsc_object( schema.convert_to_dict(self.inputs_record_schema), self.names) with SourceLine(toolpath_object, "outputs", validate.ValidationException): self.outputs_record_schema = cast( Dict[str, Any], schema.make_valid_avro(self.outputs_record_schema, {}, set()), ) schema.make_avsc_object( schema.convert_to_dict(self.outputs_record_schema), self.names) if toolpath_object.get("class") is not None and not getdefault( loadingContext.disable_js_validation, False): if loadingContext.js_hint_options_file is not None: try: with open(loadingContext.js_hint_options_file ) as options_file: validate_js_options = json.load(options_file) except (OSError, ValueError) as err: _logger.error( "Failed to read options file %s", loadingContext.js_hint_options_file, ) raise else: validate_js_options = None if self.doc_schema is not None: validate_js_expressions( cast(CommentedMap, toolpath_object), self.doc_schema.names[toolpath_object["class"]], validate_js_options, ) dockerReq, is_req = self.get_requirement("DockerRequirement") if (dockerReq is not None and "dockerOutputDirectory" in dockerReq and is_req is not None and not is_req): _logger.warning( SourceLine(item=dockerReq, raise_type=str).makeError( "When 'dockerOutputDirectory' is declared, DockerRequirement " "should go in the 'requirements' section, not 'hints'." "")) if (dockerReq is not None and is_req is not None and dockerReq.get("dockerOutputDirectory") == "/var/spool/cwl"): if is_req: # In this specific case, it is legal to have /var/spool/cwl, so skip the check. pass else: # Must be a requirement var_spool_cwl_detector(self.tool) else: var_spool_cwl_detector(self.tool)
def bind_input( self, schema: MutableMapping[str, Any], datum: Any, discover_secondaryFiles: bool, lead_pos: Optional[Union[int, List[int]]] = None, tail_pos: Optional[List[int]] = None, ) -> List[MutableMapping[str, Any]]: if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[MutableMapping[str, str]] binding = {} # type: Union[MutableMapping[str, str], CommentedMap] value_from_expression = False if "inputBinding" in schema and isinstance(schema["inputBinding"], MutableMapping): binding = CommentedMap(schema["inputBinding"].items()) bp = list(aslist(lead_pos)) if "position" in binding: position = binding["position"] if isinstance(position, str): # no need to test the CWL Version # the schema for v1.0 only allow ints binding["position"] = self.do_eval(position, context=datum) bp.append(binding["position"]) else: bp.extend(aslist(binding["position"])) else: bp.append(0) bp.extend(aslist(tail_pos)) binding["position"] = bp binding["datum"] = datum if "valueFrom" in binding: value_from_expression = True # Handle union types if isinstance(schema["type"], MutableSequence): bound_input = False for t in schema["type"]: avsc = None # type: Optional[Schema] if isinstance(t, str) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif (isinstance(t, MutableMapping) and "name" in t and self.names.has_name(t["name"], "")): avsc = self.names.get_name(t["name"], "") if not avsc: avsc = make_avsc_object(convert_to_dict(t), self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t if not value_from_expression: return self.bind_input( schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) else: self.bind_input( schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) bound_input = True if not bound_input: raise validate.ValidationException( "'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], MutableMapping): st = copy.deepcopy(schema["type"]) if (binding and "inputBinding" not in st and "type" in st and st["type"] == "array" and "itemSeparator" not in binding): st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] if value_from_expression: self.bind_input( st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, ) else: bindings.extend( self.bind_input( st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, )) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum and datum[f["name"]] is not None: bindings.extend( self.bind_input( f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles, )) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = {"type": schema["items"], "inputBinding": b2} for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input( itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles, )) binding = {} def _capture_files(f): # type: (Dict[str, str]) -> Dict[str, str] self.files.append(f) return f if schema["type"] == "File": self.files.append(datum) if (binding and binding.get("loadContents") ) or schema.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = content_limit_respected_read(f) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if "required" in sf: sf_required = self.do_eval(sf["required"], context=datum) else: sf_required = True if "$(" in sf["pattern"] or "${" in sf["pattern"]: sfpath = self.do_eval(sf["pattern"], context=datum) else: sfpath = substitute(datum["basename"], sf["pattern"]) for sfname in aslist(sfpath): if not sfname: continue found = False if isinstance(sfname, str): sf_location = ( datum["location"] [0:datum["location"].rindex("/") + 1] + sfname) sfbasename = sfname elif isinstance(sfname, MutableMapping): sf_location = sfname["location"] sfbasename = sfname["basename"] else: raise WorkflowException( "Expected secondaryFile expression to return type 'str' or 'MutableMapping', received '%s'" % (type(sfname))) for d in datum["secondaryFiles"]: if not d.get("basename"): d["basename"] = d["location"][ d["location"].rindex("/") + 1:] if d["basename"] == sfbasename: found = True if not found: def addsf( files: MutableSequence[MutableMapping[ str, Any]], newsf: MutableMapping[str, Any], ) -> None: for f in files: if f["location"] == newsf["location"]: f["basename"] = newsf["basename"] return files.append(newsf) if isinstance(sfname, MutableMapping): addsf(datum["secondaryFiles"], sfname) elif discover_secondaryFiles and self.fs_access.exists( sf_location): addsf( datum["secondaryFiles"], { "location": sf_location, "basename": sfname, "class": "File", }, ) elif sf_required: raise WorkflowException( "Missing required secondary file '%s' from file object: %s" % (sfname, json_dumps(datum, indent=4))) normalizeFilesDirs(datum["secondaryFiles"]) if "format" in schema: try: check_format(datum, self.do_eval(schema["format"]), self.formatgraph) except validate.ValidationException as ve: raise WorkflowException( "Expected value of '%s' to have format %s but\n " " %s" % (schema["name"], schema["format"], ve)) from ve visit_class( datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files, ) if schema["type"] == "Directory": ll = schema.get("loadListing") or self.loadListing if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) if schema["type"] == "Any": visit_class(datum, ("File", "Directory"), _capture_files) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def bind_input(self, schema, # type: MutableMapping[Text, Any] datum, # type: Any discover_secondaryFiles, # type: bool lead_pos=None, # type: Optional[Union[int, List[int]]] tail_pos=None, # type: Optional[List[int]] ): # type: (...) -> List[MutableMapping[Text, Any]] if tail_pos is None: tail_pos = [] if lead_pos is None: lead_pos = [] bindings = [] # type: List[MutableMapping[Text, Text]] binding = None # type: Optional[MutableMapping[Text,Any]] value_from_expression = False if "inputBinding" in schema and isinstance(schema["inputBinding"], MutableMapping): binding = CommentedMap(schema["inputBinding"].items()) assert binding is not None bp = list(aslist(lead_pos)) if "position" in binding: bp.extend(aslist(binding["position"])) else: bp.append(0) bp.extend(aslist(tail_pos)) binding["position"] = bp binding["datum"] = datum if "valueFrom" in binding: value_from_expression = True # Handle union types if isinstance(schema["type"], MutableSequence): bound_input = False for t in schema["type"]: avsc = None # type: Optional[Schema] if isinstance(t, string_types) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, MutableMapping) and "name" in t and self.names.has_name(t["name"], ""): avsc = self.names.get_name(t["name"], "") if not avsc: avsc = make_avsc_object(convert_to_dict(t), self.names) assert avsc is not None if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t if not value_from_expression: return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) bound_input = True if not bound_input: raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], MutableMapping): st = copy.deepcopy(schema["type"]) if binding is not None\ and "inputBinding" not in st\ and "type" in st\ and st["type"] == "array"\ and "itemSeparator" not in binding: st["inputBinding"] = {} for k in ("secondaryFiles", "format", "streamable"): if k in schema: st[k] = schema[k] if value_from_expression: self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles) else: bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum and datum[f["name"]] is not None: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles)) else: datum[f["name"]] = f.get("default") if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding is not None: b2 = copy.deepcopy(binding) b2["datum"] = item itemschema = { u"type": schema["items"], u"inputBinding": b2 } for k in ("secondaryFiles", "format", "streamable"): if k in schema: itemschema[k] = schema[k] bindings.extend( self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)) binding = None def _capture_files(f): self.files.append(f) return f if schema["type"] == "File": self.files.append(datum) if (binding and binding.get("loadContents")) or schema.get("loadContents"): with self.fs_access.open(datum["location"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT).decode("utf-8") if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if 'required' in sf: sf_required = self.do_eval(sf['required'], context=datum) else: sf_required = True if "$(" in sf["pattern"] or "${" in sf["pattern"]: sfpath = self.do_eval(sf["pattern"], context=datum) else: sfpath = substitute(datum["basename"], sf["pattern"]) for sfname in aslist(sfpath): if not sfname: continue found = False for d in datum["secondaryFiles"]: if not d.get("basename"): d["basename"] = d["location"][d["location"].rindex("/")+1:] if d["basename"] == sfname: found = True if not found: sf_location = datum["location"][0:datum["location"].rindex("/")+1]+sfname if isinstance(sfname, MutableMapping): datum["secondaryFiles"].append(sfname) elif discover_secondaryFiles and self.fs_access.exists(sf_location): datum["secondaryFiles"].append({ "location": sf_location, "basename": sfname, "class": "File"}) elif sf_required: raise WorkflowException("Missing required secondary file '%s' from file object: %s" % ( sfname, json_dumps(datum, indent=4))) normalizeFilesDirs(datum["secondaryFiles"]) if "format" in schema: try: check_format(datum, self.do_eval(schema["format"]), self.formatgraph) except validate.ValidationException as ve: raise WorkflowException( "Expected value of '%s' to have format %s but\n " " %s" % (schema["name"], schema["format"], ve)) visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files) if schema["type"] == "Directory": ll = schema.get("loadListing") or self.loadListing if ll and ll != "no_listing": get_listing(self.fs_access, datum, (ll == "deep_listing")) self.files.append(datum) if schema["type"] == "Any": visit_class(datum, ("File", "Directory"), _capture_files) # Position to front of the sort key if binding is not None: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def __init__(self, toolpath_object, # type: MutableMapping[Text, Any] loadingContext # type: LoadingContext ): # type: (...) -> None self.metadata = getdefault(loadingContext.metadata, {}) # type: Dict[Text,Any] self.provenance_object = None # type: Optional[ProvenanceProfile] self.parent_wf = None # type: Optional[ProvenanceProfile] global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY # pylint: disable=global-statement if SCHEMA_FILE is None or SCHEMA_ANY is None or SCHEMA_DIR is None: get_schema("v1.0") SCHEMA_ANY = cast(Dict[Text, Any], SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/salad#Any"]) SCHEMA_FILE = cast(Dict[Text, Any], SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/cwl#File"]) SCHEMA_DIR = cast(Dict[Text, Any], SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/cwl#Directory"]) self.names = schema.make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY], Loader({})) self.tool = toolpath_object self.requirements = copy.deepcopy(getdefault(loadingContext.requirements, [])) self.requirements.extend(self.tool.get("requirements", [])) if "id" not in self.tool: self.tool["id"] = "_:" + Text(uuid.uuid4()) self.requirements.extend(get_overrides(getdefault(loadingContext.overrides_list, []), self.tool["id"]).get("requirements", [])) self.hints = copy.deepcopy(getdefault(loadingContext.hints, [])) self.hints.extend(self.tool.get("hints", [])) # Versions of requirements and hints which aren't mutated. self.original_requirements = copy.deepcopy(self.requirements) self.original_hints = copy.deepcopy(self.hints) self.doc_loader = loadingContext.loader self.doc_schema = loadingContext.avsc_names self.formatgraph = None # type: Optional[Graph] if self.doc_loader is not None: self.formatgraph = self.doc_loader.graph checkRequirements(self.tool, supportedProcessRequirements) self.validate_hints(loadingContext.avsc_names, self.tool.get("hints", []), strict=getdefault(loadingContext.strict, False)) self.schemaDefs = {} # type: Dict[Text,Dict[Text, Any]] sd, _ = self.get_requirement("SchemaDefRequirement") if sd is not None: sdtypes = avroize_type(sd["types"]) av = schema.make_valid_avro(sdtypes, {t["name"]: t for t in sdtypes}, set()) for i in av: self.schemaDefs[i["name"]] = i # type: ignore schema.make_avsc_object(schema.convert_to_dict(av), self.names) # Build record schema from inputs self.inputs_record_schema = { "name": "input_record_schema", "type": "record", "fields": []} # type: Dict[Text, Any] self.outputs_record_schema = { "name": "outputs_record_schema", "type": "record", "fields": []} # type: Dict[Text, Any] for key in ("inputs", "outputs"): for i in self.tool[key]: c = copy.deepcopy(i) c["name"] = shortname(c["id"]) del c["id"] if "type" not in c: raise validate.ValidationException( u"Missing 'type' in parameter '{}'".format(c["name"])) if "default" in c and "null" not in aslist(c["type"]): nullable = ["null"] nullable.extend(aslist(c["type"])) c["type"] = nullable else: c["type"] = c["type"] c["type"] = avroize_type(c["type"], c["name"]) if key == "inputs": self.inputs_record_schema["fields"].append(c) elif key == "outputs": self.outputs_record_schema["fields"].append(c) with SourceLine(toolpath_object, "inputs", validate.ValidationException): self.inputs_record_schema = cast( Dict[Text, Any], schema.make_valid_avro( self.inputs_record_schema, {}, set())) schema.make_avsc_object( schema.convert_to_dict(self.inputs_record_schema), self.names) with SourceLine(toolpath_object, "outputs", validate.ValidationException): self.outputs_record_schema = cast( Dict[Text, Any], schema.make_valid_avro(self.outputs_record_schema, {}, set())) schema.make_avsc_object( schema.convert_to_dict(self.outputs_record_schema), self.names) if toolpath_object.get("class") is not None \ and not getdefault(loadingContext.disable_js_validation, False): if loadingContext.js_hint_options_file is not None: try: with open(loadingContext.js_hint_options_file) as options_file: validate_js_options = json.load(options_file) except (OSError, ValueError) as err: _logger.error( "Failed to read options file %s", loadingContext.js_hint_options_file) raise err else: validate_js_options = None if self.doc_schema is not None: validate_js_expressions( cast(CommentedMap, toolpath_object), self.doc_schema.names[toolpath_object["class"]], validate_js_options) dockerReq, is_req = self.get_requirement("DockerRequirement") if dockerReq is not None and "dockerOutputDirectory" in dockerReq\ and is_req is not None and not is_req: _logger.warning(SourceLine( item=dockerReq, raise_type=Text).makeError( "When 'dockerOutputDirectory' is declared, DockerRequirement " "should go in the 'requirements' section, not 'hints'.""")) if dockerReq is not None and is_req is not None\ and dockerReq.get("dockerOutputDirectory") == "/var/spool/cwl": if is_req: # In this specific case, it is legal to have /var/spool/cwl, so skip the check. pass else: # Must be a requirement var_spool_cwl_detector(self.tool) else: var_spool_cwl_detector(self.tool)