def rewrite_requirements(t: CWLObjectType) -> None: if "requirements" in t: for r in cast(MutableSequence[CWLObjectType], t["requirements"]): if isinstance(r, MutableMapping): cls = cast(str, r["class"]) if cls in rewrite: r["class"] = rewrite[cls] else: raise ValidationException( "requirements entries must be dictionaries: {} {}.". format(type(r), r)) if "hints" in t: for r in cast(MutableSequence[CWLObjectType], t["hints"]): if isinstance(r, MutableMapping): cls = cast(str, r["class"]) if cls in rewrite: r["class"] = rewrite[cls] else: raise ValidationException( "hints entries must be dictionaries: {} {}.".format( type(r), r)) if "steps" in t: for s in cast(MutableSequence[CWLObjectType], t["steps"]): if isinstance(s, MutableMapping): rewrite_requirements(s) else: raise ValidationException( "steps entries must be dictionaries: {} {}.".format( type(s), s))
def rewrite_requirements(t: MutableMapping[str, Any]) -> None: if "requirements" in t: for r in t["requirements"]: if isinstance(r, MutableMapping): if r["class"] in rewrite: r["class"] = rewrite[r["class"]] else: raise ValidationException( "requirements entries must be dictionaries: {} {}.".format( type(r), r ) ) if "hints" in t: for r in t["hints"]: if isinstance(r, MutableMapping): if r["class"] in rewrite: r["class"] = rewrite[r["class"]] else: raise ValidationException( "hints entries must be dictionaries: {} {}.".format(type(r), r) ) if "steps" in t: for s in t["steps"]: if isinstance(s, MutableMapping): rewrite_requirements(s) else: raise ValidationException( "steps entries must be dictionaries: {} {}.".format(type(s), s) )
def load_document_by_yaml( yaml: Any, uri: str, loadingOptions: Optional[LoadingOptions] = None) -> Any: """Load a CWL object from a YAML object.""" version = cwl_version(yaml) if version == "v1.0": result = cwl_v1_0.load_document_by_yaml( yaml, uri, cast(Optional[cwl_v1_0.LoadingOptions], loadingOptions)) elif version == "v1.1": result = cwl_v1_1.load_document_by_yaml( yaml, uri, cast(Optional[cwl_v1_1.LoadingOptions], loadingOptions)) elif version == "v1.2": result = cwl_v1_2.load_document_by_yaml( yaml, uri, cast(Optional[cwl_v1_2.LoadingOptions], loadingOptions)) elif version is None: raise ValidationException("could not get the cwlVersion") else: raise ValidationException( f"Version error. Did not recognise {version} as a CWL version") if isinstance(result, MutableSequence): lst = [] for r in result: if "cwlVersion" in r.attrs: r.cwlVersion = version lst.append(r) return lst return result
def _convert_stdstreams_to_files( workflowobj: Union[MutableMapping[str, Any], MutableSequence[Union[Dict[str, Any], str, int]], str] ) -> None: if isinstance(workflowobj, MutableMapping): if workflowobj.get("class") == "CommandLineTool": with SourceLine( workflowobj, "outputs", ValidationException, _logger.isEnabledFor(logging.DEBUG), ): outputs = workflowobj.get("outputs", []) if not isinstance(outputs, CommentedSeq): raise ValidationException('"outputs" section is not ' "valid.") for out in workflowobj.get("outputs", []): if not isinstance(out, CommentedMap): raise ValidationException( "Output '{}' is not a valid " "OutputParameter.".format(out)) for streamtype in ["stdout", "stderr"]: if out.get("type") == streamtype: if "outputBinding" in out: raise ValidationException( "Not allowed to specify outputBinding when" " using %s shortcut." % streamtype) if streamtype in workflowobj: filename = workflowobj[streamtype] else: filename = str( hashlib.sha1( # nosec json_dumps(workflowobj, sort_keys=True).encode( "utf-8")).hexdigest()) workflowobj[streamtype] = filename out["type"] = "File" out["outputBinding"] = cmap({"glob": filename}) for inp in workflowobj.get("inputs", []): if inp.get("type") == "stdin": if "inputBinding" in inp: raise ValidationException( "Not allowed to specify inputBinding when" " using stdin shortcut.") if "stdin" in workflowobj: raise ValidationException( "Not allowed to specify stdin path when" " using stdin type shortcut.") else: workflowobj["stdin"] = ("$(inputs.%s.path)" % inp["id"].rpartition("#")[2]) inp["type"] = "File" else: for entry in workflowobj.values(): _convert_stdstreams_to_files(entry) if isinstance(workflowobj, MutableSequence): for entry in workflowobj: _convert_stdstreams_to_files(entry)
def check_valid_locations(fs_access: StdFsAccess, ob: CWLObjectType) -> None: location = cast(str, ob["location"]) if location.startswith("_:"): pass if ob["class"] == "File" and not fs_access.isfile(location): raise ValidationException("Does not exist or is not a File: '%s'" % location) if ob["class"] == "Directory" and not fs_access.isdir(location): raise ValidationException( "Does not exist or is not a Directory: '%s'" % location)
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any errors = [] for t in self.alternates: try: return t.load(doc, baseuri, loadingOptions, docRoot=docRoot) except ValidationException as e: errors.append( ValidationException(f"tried {t.__class__.__name__} but", None, [e])) raise ValidationException("", None, errors, "-")
def checkversion( doc, # type: Union[CommentedSeq, CommentedMap] metadata, # type: CommentedMap enable_dev, # type: bool ): # type: (...) -> Tuple[CommentedMap, str] """Check the validity of the version of the give CWL document. Returns the document and the validated version string. """ cdoc = None # type: Optional[CommentedMap] if isinstance(doc, CommentedSeq): if not isinstance(metadata, CommentedMap): raise Exception("Expected metadata to be CommentedMap") lc = metadata.lc metadata = copy.deepcopy(metadata) metadata.lc.data = copy.copy(lc.data) metadata.lc.filename = lc.filename metadata["$graph"] = doc cdoc = metadata elif isinstance(doc, CommentedMap): cdoc = doc else: raise Exception("Expected CommentedMap or CommentedSeq") version = metadata["cwlVersion"] cdoc["cwlVersion"] = version updated_from = metadata.get(ORIGINAL_CWLVERSION) or cdoc.get(ORIGINAL_CWLVERSION) if updated_from: if version != INTERNAL_VERSION: raise ValidationException( "original_cwlVersion is set (%s) but cwlVersion is '%s', expected '%s' " % (updated_from, version, INTERNAL_VERSION) ) elif version not in UPDATES: if version in DEVUPDATES: if enable_dev: pass else: keys = list(UPDATES.keys()) keys.sort() raise ValidationException( u"Version '%s' is a development or deprecated version.\n " "Update your document to a stable version (%s) or use " "--enable-dev to enable support for development and " "deprecated versions." % (version, ", ".join(keys)) ) else: raise ValidationException("Unrecognized version %s" % version) return (cdoc, version)
def addLocation(d): # type: (Dict[str, Any]) -> None if "location" not in d: if d["class"] == "File" and ("contents" not in d): raise ValidationException( "Anonymous file object must have 'contents' and 'basename' fields." ) if d["class"] == "Directory" and ("listing" not in d or "basename" not in d): raise ValidationException( "Anonymous directory object must have 'listing' and 'basename' fields." ) d["location"] = "_:" + str(uuid.uuid4()) if "basename" not in d: d["basename"] = d["location"][2:] parse = urllib.parse.urlparse(d["location"]) path = parse.path # strip trailing slash if path.endswith("/"): if d["class"] != "Directory": raise ValidationException( "location '%s' ends with '/' but is not a Directory" % d["location"]) path = path.rstrip("/") d["location"] = urllib.parse.urlunparse(( parse.scheme, parse.netloc, path, parse.params, parse.query, parse.fragment, )) if not d.get("basename"): if path.startswith("_:"): d["basename"] = str(path[2:]) else: d["basename"] = str( os.path.basename(urllib.request.url2pathname(path))) if d["class"] == "File": nr, ne = os.path.splitext(d["basename"]) if d.get("nameroot") != nr: d["nameroot"] = str(nr) if d.get("nameext") != ne: d["nameext"] = str(ne) contents = d.get("contents") if contents and len(contents) > CONTENT_LIMIT: if len(contents) > CONTENT_LIMIT: raise ValidationException( "File object contains contents with number of bytes that exceeds CONTENT_LIMIT length (%d)" % CONTENT_LIMIT)
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any if isinstance(doc, MutableSequence): r = [] # type: List[Any] for d in doc: if isinstance(d, str): r.append(d) else: raise ValidationException("Expected str or sequence of str") doc = r elif isinstance(doc, str): pass else: raise ValidationException("Expected str or sequence of str") return doc
def _document_load(loader, doc, baseuri, loadingOptions): # type: (_Loader, Any, Text, LoadingOptions) -> Any if isinstance(doc, string_types): return _document_load_by_url( loader, loadingOptions.fetcher.urljoin(baseuri, doc), loadingOptions) if isinstance(doc, MutableMapping): if "$namespaces" in doc: loadingOptions = LoadingOptions(copyfrom=loadingOptions, namespaces=doc["$namespaces"]) doc = {k: v for k, v in doc.items() if k != "$namespaces"} if "$schemas" in doc: loadingOptions = LoadingOptions(copyfrom=loadingOptions, schemas=doc["$schemas"]) doc = {k: v for k, v in doc.items() if k != "$schemas"} if "$base" in doc: baseuri = doc["$base"] if "$graph" in doc: return loader.load(doc["$graph"], baseuri, loadingOptions) else: return loader.load(doc, baseuri, loadingOptions, docRoot=baseuri) if isinstance(doc, MutableSequence): return loader.load(doc, baseuri, loadingOptions) raise ValidationException()
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, Text, LoadingOptions, Optional[Text]) -> Any if doc in self.symbols: return doc else: raise ValidationException("Expected one of {}".format( self.symbols))
def resolve_tool_uri( argsworkflow: str, resolver: Optional[ResolverType] = None, fetcher_constructor: Optional[FetcherCallableType] = None, document_loader: Optional[Loader] = None, ) -> Tuple[str, str]: uri = None # type: Optional[str] split = urllib.parse.urlsplit(argsworkflow) # In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that if split.scheme and split.scheme in ["http", "https", "file"]: uri = argsworkflow elif os.path.exists(os.path.abspath(argsworkflow)): uri = file_uri(str(os.path.abspath(argsworkflow))) elif resolver is not None: uri = resolver( document_loader or default_loader(fetcher_constructor), argsworkflow ) if uri is None: raise ValidationException("Not found: '%s'" % argsworkflow) if argsworkflow != uri: _logger.info("Resolved '%s' to '%s'", argsworkflow, uri) fileuri = urllib.parse.urldefrag(uri)[0] return uri, fileuri
def fetch_document( argsworkflow: Union[str, Dict[str, Any]], loadingContext: Optional[LoadingContext] = None, ) -> Tuple[LoadingContext, CommentedMap, str]: """Retrieve a CWL document.""" if loadingContext is None: loadingContext = LoadingContext() loadingContext.loader = default_loader() else: loadingContext = loadingContext.copy() if loadingContext.loader is None: loadingContext.loader = default_loader( loadingContext.fetcher_constructor, enable_dev=loadingContext.enable_dev, doc_cache=loadingContext.doc_cache, ) if isinstance(argsworkflow, str): uri, fileuri = resolve_tool_uri( argsworkflow, resolver=loadingContext.resolver, document_loader=loadingContext.loader, ) workflowobj = cast(CommentedMap, loadingContext.loader.fetch(fileuri)) return loadingContext, workflowobj, uri if isinstance(argsworkflow, dict): uri = argsworkflow["id"] if argsworkflow.get("id") else "_:" + str( uuid.uuid4()) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) loadingContext.loader.idx[uri] = workflowobj return loadingContext, workflowobj, uri raise ValidationException("Must be URI or object: '%s'" % argsworkflow)
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any if isinstance(doc, MutableMapping): r = [] # type: List[Any] for k in sorted(doc.keys()): val = doc[k] if isinstance(val, CommentedMap): v = copy.copy(val) v.lc.data = val.lc.data v.lc.filename = val.lc.filename v[self.mapSubject] = k r.append(v) elif isinstance(val, MutableMapping): v2 = copy.copy(val) v2[self.mapSubject] = k r.append(v2) else: if self.mapPredicate: v3 = {self.mapPredicate: val} v3[self.mapSubject] = k r.append(v3) else: raise ValidationException("No mapPredicate") doc = r return self.inner.load(doc, baseuri, loadingOptions)
def _document_load(loader, doc, baseuri, loadingOptions): # type: (_Loader, Any, str, LoadingOptions) -> Any if isinstance(doc, str): return _document_load_by_url( loader, loadingOptions.fetcher.urljoin(baseuri, doc), loadingOptions) if isinstance(doc, MutableMapping): if "$namespaces" in doc or "$schemas" in doc: loadingOptions = LoadingOptions( copyfrom=loadingOptions, namespaces=doc.get("$namespaces", None), schemas=doc.get("$schemas", None), ) doc = { k: v for k, v in doc.items() if k not in ["$namespaces", "$schemas"] } if "$base" in doc: baseuri = doc["$base"] if "$graph" in doc: return loader.load(doc["$graph"], baseuri, loadingOptions) else: return loader.load(doc, baseuri, loadingOptions, docRoot=baseuri) if isinstance(doc, MutableSequence): return loader.load(doc, baseuri, loadingOptions) raise ValidationException("Oops, we shouldn't be here!")
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any if not isinstance(doc, MutableMapping): raise ValidationException("Expected a dict") return self.classtype.fromDoc(doc, baseuri, loadingOptions, docRoot=docRoot)
def check_format( actual_file: Union[CWLObjectType, List[CWLObjectType]], input_formats: Union[List[str], str], ontology: Optional[Graph], ) -> None: """Confirm that the format present is valid for the allowed formats.""" for afile in aslist(actual_file): if not afile: continue if "format" not in afile: raise ValidationException( f"File has no 'format' defined: {json_dumps(afile, indent=4)}") for inpf in aslist(input_formats): if afile["format"] == inpf or formatSubclassOf( afile["format"], inpf, ontology, set()): return raise ValidationException( f"File has an incompatible format: {json_dumps(afile, indent=4)}")
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any if not isinstance(doc, self.tp): raise ValidationException( "Expected a {} but got {}".format( self.tp.__class__.__name__, doc.__class__.__name__ ) ) return doc
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, Text, LoadingOptions, Optional[Text]) -> Any if not isinstance(doc, MutableSequence): raise ValidationException("Expected a list") r = [] # type: List[Any] errors = [] for i in range(0, len(doc)): try: lf = load_field(doc[i], _UnionLoader((self, self.items)), baseuri, loadingOptions) if isinstance(lf, MutableSequence): r.extend(lf) else: r.append(lf) except ValidationException as e: errors.append(SourceLine(doc, i, str).makeError(text_type(e))) if errors: raise ValidationException("\n".join(errors)) return r
def get_overrides(overrides: MutableSequence[CWLObjectType], toolid: str) -> CWLObjectType: req = {} # type: CWLObjectType if not isinstance(overrides, MutableSequence): raise ValidationException( "Expected overrides to be a list, but was %s" % type(overrides)) for ov in overrides: if ov["overrideTarget"] == toolid: req.update(ov) return req
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any r: List[Dict[str, Any]] = [] if isinstance(doc, MutableSequence): for d in doc: if isinstance(d, str): if d.endswith("?"): r.append({"pattern": d[:-1], "required": False}) else: r.append({"pattern": d}) elif isinstance(d, dict): new_dict: Dict[str, Any] = {} if "pattern" in d: new_dict["pattern"] = d.pop("pattern") else: raise ValidationException( "Missing pattern in secondaryFiles specification entry: {}".format( d ) ) new_dict["required"] = ( d.pop("required") if "required" in d else None ) if len(d): raise ValidationException( "Unallowed values in secondaryFiles specification entry: {}".format( d ) ) else: raise ValidationException( "Expected a string or sequence of (strings or mappings)." ) elif isinstance(doc, str): if doc.endswith("?"): r.append({"pattern": doc[:-1], "required": False}) else: r.append({"pattern": doc}) else: raise ValidationException("Expected str or sequence of str") return self.inner.load(r, baseuri, loadingOptions, docRoot)
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any if not isinstance(doc, MutableSequence): raise ValidationException("Expected a list") r = [] # type: List[Any] errors = [] # type: List[SchemaSaladException] for i in range(0, len(doc)): try: lf = load_field(doc[i], _UnionLoader((self, self.items)), baseuri, loadingOptions) if isinstance(lf, MutableSequence): r.extend(lf) else: r.append(lf) except ValidationException as e: errors.append(e.with_sourceline(SourceLine(doc, i, str))) if errors: raise ValidationException("", None, errors) return r
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, Text, LoadingOptions, Optional[Text]) -> Any errors = [] for t in self.alternates: try: return t.load(doc, baseuri, loadingOptions, docRoot=docRoot) except ValidationException as e: errors.append(u"tried {} but\n{}".format( t.__class__.__name__, indent(str(e)))) raise ValidationException(bullets(errors, u"- "))
def get_overrides(overrides: List[Dict[str, Any]], toolid: str) -> Dict[str, Any]: req = {} # type: Dict[str, Any] if not isinstance(overrides, MutableSequence): raise ValidationException( "Expected overrides to be a list, but was %s" % type(overrides)) for ov in overrides: if ov["overrideTarget"] == toolid: req.update(ov) return req
def validate_cwl_doc_main(cwl_doc_path): """ Not currently used. Calls the main function of cwltool with validation parameters. Does a lot of extra stuff. :param cwl_doc_path: :return: """ stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.ERROR) cwl_doc_path = str(cwl_doc_path) rv = cwl_tool(argsl=['--validate', '--disable-color', cwl_doc_path], logger_handler=stream_handler) if rv != 0: raise ValidationException( f"cwltool did not return a return value of 0 for {cwl_doc_path}") return
def circular_dependency_checker(step_inputs: List[CWLObjectType]) -> None: """Check if a workflow has circular dependency.""" adjacency = get_dependency_tree(step_inputs) vertices = adjacency.keys() processed: List[str] = [] cycles: List[List[str]] = [] for vertex in vertices: if vertex not in processed: traversal_path = [vertex] processDFS(adjacency, traversal_path, processed, cycles) if cycles: exception_msg = "The following steps have circular dependency:\n" cyclestrs = [str(cycle) for cycle in cycles] exception_msg += "\n".join(cyclestrs) raise ValidationException(exception_msg)
def cwl_version(yaml: Any) -> Any: """Return the cwlVersion of a YAML object. Args: yaml: A YAML object Returns: Any: The value of `cwlVersion`. Its type is Optional[str] when a given YAML object is a valid CWL object. Raises: ValidationException: If `yaml` is not a MutableMapping. """ if not isinstance(yaml, MutableMapping): raise ValidationException("MutableMapping is required") if "cwlVersion" not in list(yaml.keys()): return None return yaml["cwlVersion"]
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any if not isinstance(doc, str): raise ValidationException("Expected a str") return doc
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any if doc is not None: return doc raise ValidationException("Expected non-null")
def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any if doc in self.symbols: return doc else: raise ValidationException(f"Expected one of {self.symbols}")