def test_attachments() -> None: path = get_data("schema_salad/tests/multidoc.yml") assert path furi = file_uri(path) l1 = Loader({}) r1, _ = l1.resolve_ref(furi) with open(path) as f: content = f.read() assert {"foo": "bar", "baz": content, "quux": content} == r1 def aa1(item: Union[CommentedMap, CommentedSeq]) -> bool: return bool(item["foo"] == "bar") l2 = Loader({}, allow_attachments=aa1) r2, _ = l2.resolve_ref(furi) assert { "foo": "bar", "baz": "This is the {first attachment}.\n", "quux": "This is the [second attachment].", } == r2 def aa2(item: Union[CommentedMap, CommentedSeq]) -> bool: return bool(item["foo"] == "baz") l3 = Loader({}, allow_attachments=aa2) r3, _ = l3.resolve_ref(furi) with open(path) as f: content = f.read() assert {"foo": "bar", "baz": content, "quux": content} == r3
def default_loader(fetcher_constructor): # type: (Optional[FetcherConstructorType]) -> Loader if fetcher_constructor in loaders: return loaders[fetcher_constructor] loader = Loader(jobloaderctx, fetcher_constructor=fetcher_constructor) loaders[fetcher_constructor] = loader return loader
def load_job_order( args: argparse.Namespace, stdin: IO[Any], fetcher_constructor: Optional[FetcherCallableType], overrides_list: List[CWLObjectType], tool_file_uri: str, ) -> Tuple[Optional[CWLObjectType], str, Loader]: job_order_object = None job_order_file = None _jobloaderctx = jobloaderctx.copy() loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor) if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.main.round_trip_load(stdin) job_order_object, _ = loader.resolve_all( job_order_object, file_uri(os.getcwd()) + "/" ) else: job_order_file = None if job_order_object is not None: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file is not None: input_basedir = ( args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file)) ) job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False) if ( job_order_object is not None and "http://commonwl.org/cwltool#overrides" in job_order_object ): ov_uri = file_uri(job_order_file or input_basedir) overrides_list.extend( resolve_overrides(job_order_object, ov_uri, tool_file_uri) ) del job_order_object["http://commonwl.org/cwltool#overrides"] if job_order_object is None: input_basedir = args.basedir if args.basedir else os.getcwd() if job_order_object is not None and not isinstance( job_order_object, MutableMapping ): _logger.error( "CWL input object at %s is not formatted correctly, it should be a " "JSON/YAML dictionay, not %s.\n" "Raw input object:\n%s", job_order_file or "stdin", type(job_order_object), job_order_object, ) sys.exit(1) return (job_order_object, input_basedir, loader)
def fetch_document( argsworkflow, # type: Union[Text, Dict[Text, Any]] resolver=None, # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text] fetcher_constructor=None # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher] ): # type: (...) -> Tuple[Loader, CommentedMap, Text] """Retrieve a CWL document.""" document_loader = Loader( jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore uri = None # type: Text workflowobj = None # type: CommentedMap if isinstance(argsworkflow, string_types): uri, fileuri = resolve_tool_uri(argsworkflow, resolver=resolver, document_loader=document_loader) workflowobj = document_loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = "#" + Text(id(argsworkflow)) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri
def test_idmap() -> None: ldr = Loader({}) ldr.add_context( { "inputs": { "@id": "http://example.com/inputs", "mapSubject": "id", "mapPredicate": "a", }, "outputs": {"@type": "@id", "identity": True}, "id": "@id", } ) ra, _ = ldr.resolve_all( cmap( { "id": "stuff", "inputs": {"zip": 1, "zing": 2}, "outputs": ["out"], "other": {"n": 9}, } ), "http://example2.com/", ) assert isinstance(ra, CommentedMap) assert "http://example2.com/#stuff" == ra["id"] for item in ra["inputs"]: if item["a"] == 2: assert "http://example2.com/#stuff/zing" == item["id"] else: assert "http://example2.com/#stuff/zip" == item["id"] assert ["http://example2.com/#stuff/out"] == ra["outputs"] assert {"n": 9} == ra["other"]
def cwl_dispatch(self, json): try: cwlwf, it_is_workflow = load_cwl(self.dag.default_args["cwl_workflow"], self.dag.default_args) cwl_context = { "outdir": mkdtemp( prefix=os.path.abspath(os.path.join(self.tmp_folder, 'dag_tmp_'))) } _jobloaderctx = jobloaderctx.copy() _jobloaderctx.update(cwlwf.metadata.get("$namespaces", {})) loader = Loader(_jobloaderctx) try: job_order_object = yaml.round_trip_load(io.StringIO(initial_value=dumps(json))) job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/", checklinks=False) except Exception as e: _logger.error("Job Loader: {}".format(str(e))) job_order_object = init_job_order(job_order_object, None, cwlwf, loader, sys.stdout) cwl_context['promises'] = job_order_object logging.info( '{0}: Final job: \n {1}'.format(self.task_id, dumps(cwl_context, indent=4))) return cwl_context except Exception as e: _logger.info( 'Dispatch Exception {0}: \n {1} {2}'.format(self.task_id, type(e), e)) pass return None
def load_job_order(args, # type: argparse.Namespace stdin, # type: IO[Any] fetcher_constructor, # Fetcher overrides, # type: List[Dict[Text, Any]] tool_file_uri # type: Text ): # type: (...) -> Tuple[Dict[Text, Any], Text, Loader] job_order_object = None _jobloaderctx = jobloaderctx.copy() loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.round_trip_load(stdin) job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/") else: job_order_file = None if job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file)) job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False) if job_order_object and "http://commonwl.org/cwltool#overrides" in job_order_object: overrides.extend(resolve_overrides(job_order_object, file_uri(job_order_file), tool_file_uri)) del job_order_object["http://commonwl.org/cwltool#overrides"] if not job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() return (job_order_object, input_basedir, loader)
def test_rdf_datetime() -> None: """Affirm that datetime objects can be serialized in makerdf().""" ldr = Loader({}) ctx: ContextType = { "id": "@id", "location": {"@id": "@id", "@type": "@id"}, "bar": "http://example.com/bar", "ex": "http://example.com/", } ldr.add_context(ctx) ra: CommentedMap = cast( CommentedMap, ldr.resolve_all( cmap( { "id": "foo", "bar": {"id": "baz"}, } ), "http://example.com", )[0], ) ra["s:dateCreated"] = datetime.datetime(2020, 10, 8) g = makerdf(None, ra, ctx) g.serialize(destination=stdout(), format="n3") g2 = makerdf(None, CommentedSeq([ra]), ctx) g2.serialize(destination=stdout(), format="n3")
def test_fragment() -> None: ldr = Loader({"id": "@id"}) path = get_data("tests/frag.yml#foo2") assert path b = ldr.resolve_ref(path)[0] assert isinstance(b, CommentedMap) assert {"id": b["id"], "bar": "b2"} == b
def resolve_tool_uri( argsworkflow, # type: Text resolver=None, # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text] fetcher_constructor=None, # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher] document_loader=None # type: Loader ): # type: (...) -> Tuple[Text, Text] uri = None # type: Text split = urllib.parse.urlsplit(argsworkflow) # In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that if split.scheme and split.scheme in [u'http', u'https', u'file']: uri = argsworkflow elif os.path.exists(os.path.abspath(argsworkflow)): uri = file_uri(str(os.path.abspath(argsworkflow))) elif resolver: if document_loader is None: document_loader = Loader( jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore uri = resolver(document_loader, argsworkflow) if uri is None: raise ValidationException("Not found: '%s'" % argsworkflow) if argsworkflow != uri: _logger.info("Resolved '%s' to '%s'", argsworkflow, uri) fileuri = urllib.parse.urldefrag(uri)[0] return uri, fileuri
def test_scoped_id() -> None: ldr = Loader({}) ctx = { "id": "@id", "location": {"@id": "@id", "@type": "@id"}, "bar": "http://example.com/bar", "ex": "http://example.com/", } # type: ContextType ldr.add_context(ctx) ra, _ = ldr.resolve_all( cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com" ) assert { "id": "http://example.com/#foo", "bar": {"id": "http://example.com/#foo/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"location": "foo", "bar": {"location": "baz"}}), "http://example.com", checklinks=False, ) assert { "location": "http://example.com/foo", "bar": {"location": "http://example.com/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"id": "foo", "bar": {"location": "baz"}}), "http://example.com", checklinks=False, ) assert { "id": "http://example.com/#foo", "bar": {"location": "http://example.com/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"location": "foo", "bar": {"id": "baz"}}), "http://example.com", checklinks=False, ) assert { "location": "http://example.com/foo", "bar": {"id": "http://example.com/#baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3"))
def test_cache() -> None: loader = Loader({}) foo = os.path.join(os.getcwd(), "foo.txt") foo = file_uri(foo) loader.cache.update({foo: "hello: foo"}) print(loader.cache) assert {"hello": "foo"} == loader.resolve_ref("foo.txt")[0] assert loader.check_exists(foo)
def test_mixin() -> None: base_url = file_uri(os.path.join(os.getcwd(), "tests")) ldr = Loader({}) path = get_data("tests/mixin.yml") assert path ra = ldr.resolve_ref(cmap({"$mixin": path, "one": "five"}), base_url=base_url) assert {"id": "four", "one": "five"} == ra[0] ldr = Loader({"id": "@id"}) ra = ldr.resolve_all( cmap([{"id": "a", "m": {"$mixin": path}}, {"id": "b", "m": {"$mixin": path}}]), base_url=base_url, ) assert [ {"id": base_url + "#a", "m": {"id": base_url + "#a/four", "one": "two"}}, {"id": base_url + "#b", "m": {"id": base_url + "#b/four", "one": "two"}}, ] == ra[0]
def default_loader(fetcher_constructor=None, enable_dev=False, doc_cache=True): # type: (Optional[FetcherConstructorType], bool, bool) -> Loader return Loader( jobloaderctx, fetcher_constructor=fetcher_constructor, allow_attachments=lambda r: enable_dev, doc_cache=doc_cache, )
def test_Loader_initialisation_for_TMP_env_var(tmp_dir_fixture): # Ensure HOME is missing. if "HOME" in os.environ: del os.environ["HOME"] # Ensure TMP is present. os.environ["TMP"] = tmp_dir_fixture loader = Loader(ctx={}) assert isinstance(loader.session, Session)
def test_Loader_initialisation_with_neither_TMP_HOME_set(tmp_dir_fixture): # Ensure HOME is missing. if "HOME" in os.environ: del os.environ["HOME"] if "TMP" in os.environ: del os.environ["TMP"] loader = Loader(ctx={}) assert isinstance(loader.session, Session)
def wes_collect_attachments(self, run_id): tempdir = tempfile.mkdtemp(dir=get_dir( path.abspath( conf_get("cwl", "tmp_folder", path.join(AIRFLOW_HOME, "cwl_tmp_folder")))), prefix="run_id_" + run_id + "_") logging.debug(f"Save all attached files to {tempdir}") for k, ls in iterlists(connexion.request.files): logging.debug(f"Process attachment parameter {k}") if k == "workflow_attachment": for v in ls: try: logging.debug(f"Process attached file {v}") sp = v.filename.split("/") fn = [] for p in sp: if p not in ("", ".", ".."): fn.append(secure_filename(p)) dest = path.join(tempdir, *fn) if not path.isdir(path.dirname(dest)): get_dir(path.dirname(dest)) logging.debug(f"Save {v.filename} to {dest}") v.save(dest) except Exception as err: raise ValueError( f"Failed to process attached file {v}, {err}") body = {} for k, ls in iterlists(connexion.request.form): logging.debug(f"Process form parameter {k}") for v in ls: try: if not v: continue if k == "workflow_params": job_file = path.join(tempdir, "job.json") with open(job_file, "w") as f: json.dump(json.loads(v), f, indent=4) logging.debug(f"Save job file to {job_file}") loader = Loader(load.jobloaderctx.copy()) job_order_object, _ = loader.resolve_ref( job_file, checklinks=False) body[k] = job_order_object else: body[k] = v except Exception as err: raise ValueError( f"Failed to process form parameter {k}, {v}, {err}") if "workflow_params" not in body or "workflow_url" not in body: raise ValueError( "Missing 'workflow_params' or 'workflow_url' in submission") body["workflow_url"] = path.join(tempdir, secure_filename(body["workflow_url"])) return tempdir, body
def resolve_overrides( ov, ov_uri, baseurl): # type: (CommentedMap, Text, Text) -> List[Dict[Text, Any]] ovloader = Loader(overrides_ctx) ret, _ = ovloader.resolve_all(ov, baseurl) if not isinstance(ret, CommentedMap): raise Exception("Expected CommentedMap, got %s" % type(ret)) cwl_docloader = get_schema("v1.0")[0] cwl_docloader.resolve_all(ret, ov_uri) return ret["overrides"]
def test_Loader_initialisation_for_HOME_env_var(tmp_dir_fixture): import os from schema_salad.ref_resolver import Loader from requests import Session # Ensure HOME is set. os.environ["HOME"] = tmp_dir_fixture loader = Loader(ctx={}) assert isinstance(loader.session, Session)
def _schema_salad_validate(self, schema_path, document_path): ''' Adapted from schema_salad main(). :param schema_path: :param document_path: :return: ''' strict_foreign_properties = False strict = True metaschema_names, metaschema_doc, metaschema_loader = get_metaschema() schema_uri = str(schema_path) if not (urlparse(schema_uri)[0] and urlparse(schema_uri)[0] in ['http', 'https', 'file']): schema_uri = file_uri(schema_uri) schema_raw_doc = metaschema_loader.fetch(schema_uri) schema_doc, schema_metadata = metaschema_loader.resolve_all( schema_raw_doc, schema_uri) # Validate schema against metaschema validate_doc(metaschema_names, schema_doc, metaschema_loader, True) # Get the json-ld context and RDFS representation from the schema metactx = collect_namespaces(schema_metadata) if "$base" in schema_metadata: metactx["@base"] = schema_metadata["$base"] (schema_ctx, rdfs) = salad_to_jsonld_context(schema_doc, metactx) # Create the loader that will be used to load the target document. document_loader = Loader(schema_ctx, skip_schemas=False) # Make the Avro validation that will be used to validate the target # document avsc_obj = make_avro(schema_doc, document_loader) avsc_names = make_avro_schema_from_avro(avsc_obj) # Load target document and resolve refs uri = str(document_path) document, doc_metadata = document_loader.resolve_ref( uri, strict_foreign_properties=strict_foreign_properties, checklinks=False ) # This is what's getting us around file link checking. validate_doc(avsc_names, document, document_loader, strict=strict, strict_foreign_properties=strict_foreign_properties) return
def default_loader( fetcher_constructor: Optional[FetcherCallableType] = None, enable_dev: bool = False, doc_cache: bool = True, ) -> Loader: return Loader( jobloaderctx, fetcher_constructor=fetcher_constructor, allow_attachments=lambda r: enable_dev, doc_cache=doc_cache, )
def test_fetch_inject_id(): l1 = Loader({"id": "@id"}) furi1 = file_uri(get_data("schema_salad/tests/inject-id1.yml")).lower() r1, _ = l1.resolve_ref(furi1) assert {"id": furi1 + "#foo", "bar": "baz"} == r1 assert [furi1, furi1 + "#foo"] == sorted(list(k.lower() for k in l1.idx.keys())) l2 = Loader({"id": "@id"}) furi2 = file_uri(get_data("schema_salad/tests/inject-id2.yml")).lower() r2, _ = l2.resolve_ref(furi2) assert {"id": furi2, "bar": "baz"} == r2 assert [furi2] == sorted(list(k.lower() for k in l2.idx.keys())) l3 = Loader({"id": "@id"}) furi3 = file_uri(get_data("schema_salad/tests/inject-id3.yml")).lower() r3, _ = l3.resolve_ref(furi3) assert {"id": "http://example.com", "bar": "baz"} == r3 assert [furi3, "http://example.com" ] == sorted(list(k.lower() for k in l3.idx.keys()))
def resolve_overrides(ov, # Type: CommentedMap ov_uri, # Type: Text baseurl # type: Text ): # type: (...) -> List[Dict[Text, Any]] ovloader = Loader(overrides_ctx) ret, _ = ovloader.resolve_all(ov, baseurl) if not isinstance(ret, CommentedMap): raise Exception("Expected CommentedMap, got %s" % type(ret)) cwl_docloader = get_schema("v1.0")[0] cwl_docloader.resolve_all(ret, ov_uri) return ret["http://commonwl.org/cwltool#overrides"]
def resolve_overrides( ov: IdxResultType, ov_uri: str, baseurl: str, ) -> List[CWLObjectType]: ovloader = Loader(overrides_ctx) ret, _ = ovloader.resolve_all(ov, baseurl) if not isinstance(ret, CommentedMap): raise Exception("Expected CommentedMap, got %s" % type(ret)) cwl_docloader = get_schema("v1.0")[0] cwl_docloader.resolve_all(ret, ov_uri) return cast(List[CWLObjectType], ret["http://commonwl.org/cwltool#overrides"])
def test_fetcher() -> None: loader = Loader({}, fetcher_constructor=testFetcher) assert {"hello": "foo"} == loader.resolve_ref("foo.txt")[0] assert { "hello": "keepfoo" } == loader.resolve_ref("foo.txt", base_url="keep:abc+123")[0] assert loader.check_exists("foo.txt") with pytest.raises(RuntimeError): loader.resolve_ref("bar.txt") assert not loader.check_exists("bar.txt")
def test_blank_node_id() -> None: # Test that blank nodes are passed through and not considered # relative paths. Blank nodes (also called anonymous ids) are # URIs starting with "_:". They are randomly generated # placeholders mainly used internally where an id is needed but # was not given. ldr = Loader({}) ctx = {"id": "@id"} # type: ContextType ldr.add_context(ctx) ra, _ = ldr.resolve_all(cmap({"id": "_:foo"}), "http://example.com") assert {"id": "_:foo"} == ra
def expand_cwl(cwl, uri, g): try: document_loader = Loader({ "cwl": "https://w3id.org/cwl/cwl#", "id": "@id" }) cwl = yaml.load(cwl) document_loader, avsc_names, processobj, metadata, uri = validate_document( document_loader, cwl, uri, strict=False) jsonld_context.makerdf(uri, processobj, document_loader.ctx, graph=g) sys.stderr.write("\n%s: imported ok\n" % (uri)) except Exception as e: sys.stderr.write("\n%s: %s\n" % (uri, e))
def test_Loader_initialisation_with_neither_TMP_HOME_set(tmp_dir_fixture): import os from schema_salad.ref_resolver import Loader from requests import Session # Ensure HOME is missing. if "HOME" in os.environ: del os.environ["HOME"] if "TMP" in os.environ: del os.environ["TMP"] loader = Loader(ctx={}) assert isinstance(loader.session, Session)
def test_attachments(): furi = file_uri(get_data("schema_salad/tests/multidoc.yml")) l1 = Loader({}) r1, _ = l1.resolve_ref(furi) with open(get_data("schema_salad/tests/multidoc.yml"), "rt") as f: content = f.read() assert {"foo": "bar", "baz": content, "quux": content} == r1 l2 = Loader({}, allow_attachments=lambda x: x["foo"] == "bar") r2, _ = l2.resolve_ref(furi) assert { "foo": "bar", "baz": "This is the {first attachment}.\n", "quux": "This is the [second attachment].", } == r2 l3 = Loader({}, allow_attachments=lambda x: x["foo"] == "baz") r3, _ = l3.resolve_ref(furi) with open(get_data("schema_salad/tests/multidoc.yml"), "rt") as f: content = f.read() assert {"foo": "bar", "baz": content, "quux": content} == r3
def test_fetch_inject_id() -> None: path = get_data("schema_salad/tests/inject-id1.yml") assert path if is_fs_case_sensitive(os.path.dirname(path)): def lower(item: str) -> str: return item else: def lower(item: str) -> str: return item.lower() l1 = Loader({"id": "@id"}) furi1 = file_uri(path) r1, _ = l1.resolve_ref(furi1) assert {"id": furi1 + "#foo", "bar": "baz"} == r1 assert [lower(furi1), lower(furi1 + "#foo")] == sorted( list(lower(k) for k in l1.idx.keys()) ) l2 = Loader({"id": "@id"}) path2 = get_data("schema_salad/tests/inject-id2.yml") assert path2 furi2 = file_uri(path2) r2, _ = l2.resolve_ref(furi2) assert {"id": furi2, "bar": "baz"} == r2 assert [lower(furi2)] == sorted(list(lower(k) for k in l2.idx.keys())) l3 = Loader({"id": "@id"}) path3 = get_data("schema_salad/tests/inject-id3.yml") assert path3 furi3 = file_uri(path3) r3, _ = l3.resolve_ref(furi3) assert {"id": "http://example.com", "bar": "baz"} == r3 assert [lower(furi3), "http://example.com"] == sorted( list(lower(k) for k in l3.idx.keys()) )