def load_job_order(args, # type: argparse.Namespace stdin, # type: IO[Any] fetcher_constructor, # Fetcher overrides, # type: List[Dict[Text, Any]] tool_file_uri # type: Text ): # type: (...) -> Tuple[Dict[Text, Any], Text, Loader] job_order_object = None _jobloaderctx = jobloaderctx.copy() loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.round_trip_load(stdin) job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/") else: job_order_file = None if job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file)) job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False) if job_order_object and "http://commonwl.org/cwltool#overrides" in job_order_object: overrides.extend(resolve_overrides(job_order_object, file_uri(job_order_file), tool_file_uri)) del job_order_object["http://commonwl.org/cwltool#overrides"] if not job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() return (job_order_object, input_basedir, loader)
def resolve_overrides(ov, ov_uri, baseurl): # type: (CommentedMap, Text, Text) -> List[Dict[Text, Any]] ovloader = Loader(overrides_ctx) ret, _ = ovloader.resolve_all(ov, baseurl) if not isinstance(ret, CommentedMap): raise Exception("Expected CommentedMap, got %s" % type(ret)) cwl_docloader = get_schema("v1.0")[0] cwl_docloader.resolve_all(ret, ov_uri) return ret["overrides"]
def create_loader(ctx): loader = Loader() url_fields = [] for c in ctx: if c != "id" and (ctx[c] == "@id") or (isinstance(ctx[c], dict) and ctx[c].get("@type") == "@id"): url_fields.append(c) loader.url_fields = url_fields loader.idx["cwl:JsonPointer"] = {} return loader
def resolve_overrides(ov, # Type: CommentedMap ov_uri, # Type: Text baseurl # type: Text ): # type: (...) -> List[Dict[Text, Any]] ovloader = Loader(overrides_ctx) ret, _ = ovloader.resolve_all(ov, baseurl) if not isinstance(ret, CommentedMap): raise Exception("Expected CommentedMap, got %s" % type(ret)) cwl_docloader = get_schema("v1.0")[0] cwl_docloader.resolve_all(ret, ov_uri) return ret["http://commonwl.org/cwltool#overrides"]
def resolve_overrides( ov, # Type: CommentedMap ov_uri, # Type: Text baseurl # type: Text ): # type: (...) -> List[Dict[Text, Any]] ovloader = Loader(overrides_ctx) ret, _ = ovloader.resolve_all(ov, baseurl) if not isinstance(ret, CommentedMap): raise Exception("Expected CommentedMap, got %s" % type(ret)) cwl_docloader = get_schema("v1.0")[0] cwl_docloader.resolve_all(ret, ov_uri) return ret["overrides"]
def resolve_overrides( ov: IdxResultType, ov_uri: str, baseurl: str, ) -> List[CWLObjectType]: ovloader = Loader(overrides_ctx) ret, _ = ovloader.resolve_all(ov, baseurl) if not isinstance(ret, CommentedMap): raise Exception("Expected CommentedMap, got %s" % type(ret)) cwl_docloader = get_schema("v1.0")[0] cwl_docloader.resolve_all(ret, ov_uri) return cast(List[CWLObjectType], ret["http://commonwl.org/cwltool#overrides"])
def test_blank_node_id() -> None: # Test that blank nodes are passed through and not considered # relative paths. Blank nodes (also called anonymous ids) are # URIs starting with "_:". They are randomly generated # placeholders mainly used internally where an id is needed but # was not given. ldr = Loader({}) ctx = {"id": "@id"} # type: ContextType ldr.add_context(ctx) ra, _ = ldr.resolve_all(cmap({"id": "_:foo"}), "http://example.com") assert {"id": "_:foo"} == ra
def resolve_overrides( ov, # type: CommentedMap ov_uri, # type: str baseurl, # type: str ): # type: (...) -> List[Dict[str, Any]] ovloader = Loader(overrides_ctx) ret, _ = ovloader.resolve_all(ov, baseurl) if not isinstance(ret, CommentedMap): raise Exception("Expected CommentedMap, got %s" % type(ret)) cwl_docloader = get_schema("v1.0")[0] cwl_docloader.resolve_all(ret, ov_uri) return cast(List[Dict[str, Any]], ret["http://commonwl.org/cwltool#overrides"])
def load_job_order( args, # type: argparse.Namespace stdin, # type: IO[Any] fetcher_constructor, # Fetcher overrides_list, # type: List[Dict[Text, Any]] tool_file_uri # type: Text ): # type: (...) -> Tuple[Optional[MutableMapping[Text, Any]], Text, Loader] job_order_object = None job_order_file = None _jobloaderctx = jobloaderctx.copy() loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.round_trip_load(stdin) job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/") else: job_order_file = None if job_order_object is not None: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file is not None: input_basedir = args.basedir if args.basedir \ else os.path.abspath(os.path.dirname(job_order_file)) job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False) if job_order_object is not None and "http://commonwl.org/cwltool#overrides" in job_order_object: ov_uri = file_uri(job_order_file or input_basedir) overrides_list.extend( resolve_overrides(job_order_object, ov_uri, tool_file_uri)) del job_order_object["http://commonwl.org/cwltool#overrides"] if job_order_object is None: input_basedir = args.basedir if args.basedir else os.getcwd() if job_order_object is not None and not isinstance(job_order_object, MutableMapping): _logger.error( 'CWL input object at %s is not formatted correctly, it should be a ' 'JSON/YAML dictionay, not %s.\n' 'Raw input object:\n%s', job_order_file or "stdin", type(job_order_object), job_order_object) sys.exit(1) return (job_order_object, input_basedir, loader)
def test_scoped_id() -> None: ldr = Loader({}) ctx = { "id": "@id", "location": {"@id": "@id", "@type": "@id"}, "bar": "http://example.com/bar", "ex": "http://example.com/", } # type: ContextType ldr.add_context(ctx) ra, _ = ldr.resolve_all( cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com" ) assert { "id": "http://example.com/#foo", "bar": {"id": "http://example.com/#foo/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"location": "foo", "bar": {"location": "baz"}}), "http://example.com", checklinks=False, ) assert { "location": "http://example.com/foo", "bar": {"location": "http://example.com/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"id": "foo", "bar": {"location": "baz"}}), "http://example.com", checklinks=False, ) assert { "id": "http://example.com/#foo", "bar": {"location": "http://example.com/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"location": "foo", "bar": {"id": "baz"}}), "http://example.com", checklinks=False, ) assert { "location": "http://example.com/foo", "bar": {"id": "http://example.com/#baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3"))
def test_subscoped_id() -> None: ldr = Loader({}) ctx = { "id": "@id", "bar": {"subscope": "bar"}, } # type: ContextType ldr.add_context(ctx) ra, _ = ldr.resolve_all( cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com" ) assert { "id": "http://example.com/#foo", "bar": {"id": "http://example.com/#foo/bar/baz"}, } == ra
def resolve_tool_uri( argsworkflow, # type: Text resolver=None, # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text] fetcher_constructor=None, # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher] document_loader=None # type: Loader ): # type: (...) -> Tuple[Text, Text] uri = None # type: Text split = urllib.parse.urlsplit(argsworkflow) # In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that if split.scheme and split.scheme in [u'http', u'https', u'file']: uri = argsworkflow elif os.path.exists(os.path.abspath(argsworkflow)): uri = file_uri(str(os.path.abspath(argsworkflow))) elif resolver: if document_loader is None: document_loader = Loader( jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore uri = resolver(document_loader, argsworkflow) if uri is None: raise ValidationException("Not found: '%s'" % argsworkflow) if argsworkflow != uri: _logger.info("Resolved '%s' to '%s'", argsworkflow, uri) fileuri = urllib.parse.urldefrag(uri)[0] return uri, fileuri
def test_sourceline() -> None: ldr = Loader({"id": "@id"}) path = get_data("tests/frag.yml") assert path b, _ = ldr.resolve_ref(path) class TestExp(Exception): pass try: with SourceLine(b, 1, TestExp, False): raise Exception("Whoops") except TestExp as e: assert str(e).endswith("frag.yml:3:3: Whoops"), e except Exception as exc: assert False, exc
def default_loader(fetcher_constructor): # type: (Optional[FetcherConstructorType]) -> Loader if fetcher_constructor in loaders: return loaders[fetcher_constructor] loader = Loader(jobloaderctx, fetcher_constructor=fetcher_constructor) loaders[fetcher_constructor] = loader return loader
def default_loader(fetcher_constructor=None, enable_dev=False, doc_cache=True): # type: (Optional[FetcherConstructorType], bool, bool) -> Loader return Loader( jobloaderctx, fetcher_constructor=fetcher_constructor, allow_attachments=lambda r: enable_dev, doc_cache=doc_cache, )
def load_job_order(args, # type: argparse.Namespace stdin, # type: IO[Any] fetcher_constructor, # Fetcher overrides_list, # type: List[Dict[Text, Any]] tool_file_uri # type: Text ): # type: (...) -> Tuple[Optional[MutableMapping[Text, Any]], Text, Loader] job_order_object = None job_order_file = None _jobloaderctx = jobloaderctx.copy() loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.round_trip_load(stdin) job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/") else: job_order_file = None if job_order_object is not None: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file is not None: input_basedir = args.basedir if args.basedir \ else os.path.abspath(os.path.dirname(job_order_file)) job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False) if job_order_object is not None and "http://commonwl.org/cwltool#overrides" in job_order_object: ov_uri = file_uri(job_order_file or input_basedir) overrides_list.extend( resolve_overrides(job_order_object, ov_uri, tool_file_uri)) del job_order_object["http://commonwl.org/cwltool#overrides"] if job_order_object is None: input_basedir = args.basedir if args.basedir else os.getcwd() if job_order_object is not None and not isinstance(job_order_object, MutableMapping): _logger.error( 'CWL input object at %s is not formatted correctly, it should be a ' 'JSON/YAML dictionay, not %s.\n' 'Raw input object:\n%s', job_order_file or "stdin", type(job_order_object), job_order_object) sys.exit(1) return (job_order_object, input_basedir, loader)
def test_Loader_initialisation_for_TMP_env_var(tmp_dir_fixture): # Ensure HOME is missing. if "HOME" in os.environ: del os.environ["HOME"] # Ensure TMP is present. os.environ["TMP"] = tmp_dir_fixture loader = Loader(ctx={}) assert isinstance(loader.session, Session)
def test_Loader_initialisation_with_neither_TMP_HOME_set(tmp_dir_fixture): # Ensure HOME is missing. if "HOME" in os.environ: del os.environ["HOME"] if "TMP" in os.environ: del os.environ["TMP"] loader = Loader(ctx={}) assert isinstance(loader.session, Session)
def test_fetcher() -> None: loader = Loader({}, fetcher_constructor=testFetcher) assert {"hello": "foo"} == loader.resolve_ref("foo.txt")[0] assert { "hello": "keepfoo" } == loader.resolve_ref("foo.txt", base_url="keep:abc+123")[0] assert loader.check_exists("foo.txt") with pytest.raises(RuntimeError): loader.resolve_ref("bar.txt") assert not loader.check_exists("bar.txt")
def test_Loader_initialisation_for_HOME_env_var(tmp_dir_fixture): import os from schema_salad.ref_resolver import Loader from requests import Session # Ensure HOME is set. os.environ["HOME"] = tmp_dir_fixture loader = Loader(ctx={}) assert isinstance(loader.session, Session)
def load_job_order( args, # type: argparse.Namespace stdin, # type: IO[Any] fetcher_constructor, # Fetcher overrides, # type: List[Dict[Text, Any]] tool_file_uri # type: Text ): # type: (...) -> Tuple[Dict[Text, Any], Text, Loader] job_order_object = None _jobloaderctx = jobloaderctx.copy() loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.round_trip_load(stdin) job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/") else: job_order_file = None if job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file: input_basedir = args.basedir if args.basedir else os.path.abspath( os.path.dirname(job_order_file)) job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False) if job_order_object and "http://commonwl.org/cwltool#overrides" in job_order_object: overrides.extend( resolve_overrides(job_order_object, file_uri(job_order_file), tool_file_uri)) del job_order_object["http://commonwl.org/cwltool#overrides"] if not job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() return (job_order_object, input_basedir, loader)
def default_loader( fetcher_constructor: Optional[FetcherCallableType] = None, enable_dev: bool = False, doc_cache: bool = True, ) -> Loader: return Loader( jobloaderctx, fetcher_constructor=fetcher_constructor, allow_attachments=lambda r: enable_dev, doc_cache=doc_cache, )
def fetch_document( argsworkflow, # type: Union[Text, dict[Text, Any]] resolver=None, # type: Callable[[Loader, Union[Text, dict[Text, Any]]], Text] fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] ): # type: (...) -> Tuple[Loader, CommentedMap, Text] """Retrieve a CWL document.""" document_loader = Loader({ "cwl": "https://w3id.org/cwl/cwl#", "id": "@id" }, fetcher_constructor=fetcher_constructor) uri = None # type: Text workflowobj = None # type: CommentedMap if isinstance(argsworkflow, string_types): split = urllib.parse.urlsplit(argsworkflow) if split.scheme: uri = argsworkflow elif os.path.exists(os.path.abspath(argsworkflow)): uri = file_uri(str(os.path.abspath(argsworkflow))) elif resolver: uri = resolver(document_loader, argsworkflow) if uri is None: raise ValidationException("Not found: '%s'" % argsworkflow) if argsworkflow != uri: _logger.info("Resolved '%s' to '%s'", argsworkflow, uri) fileuri = urllib.parse.urldefrag(uri)[0] workflowobj = document_loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = "#" + Text(id(argsworkflow)) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri
def test_schemas() -> None: loader = Loader({}) path = get_data("tests/EDAM.owl") assert path ra, _ = loader.resolve_all( cmap( { "$schemas": [file_uri(path)], "$namespaces": {"edam": "http://edamontology.org/"}, "edam:has_format": "edam:format_1915", } ), "", ) assert { "$schemas": [file_uri(path)], "$namespaces": {"edam": "http://edamontology.org/"}, "http://edamontology.org/has_format": "http://edamontology.org/format_1915", } == ra
def fetch_document(argsworkflow): # type: (Union[Text, Text, dict[Text, Any]]) -> Tuple[Loader, Dict[Text, Any], Text] """Retrieve a CWL document.""" document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"}) uri = None # type: Text workflowobj = None # type: Dict[Text, Any] if isinstance(argsworkflow, basestring): split = urlparse.urlsplit(argsworkflow) if split.scheme: uri = argsworkflow else: uri = "file://" + os.path.abspath(argsworkflow) fileuri = urlparse.urldefrag(uri)[0] workflowobj = document_loader.fetch(fileuri) elif isinstance(argsworkflow, dict): workflowobj = argsworkflow uri = "#" + Text(id(argsworkflow)) else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri
def cwl_dispatch(self, json): try: cwlwf, it_is_workflow = load_cwl( self.dag.default_args["cwl_workflow"], self.dag.default_args) cwl_context = { "outdir": mkdtemp(dir=get_folder(os.path.abspath(self.tmp_folder)), prefix="dag_tmp_") } _jobloaderctx = jobloaderctx.copy() _jobloaderctx.update(cwlwf.metadata.get("$namespaces", {})) loader = Loader(_jobloaderctx) try: job_order_object = yaml.round_trip_load( io.StringIO(initial_value=dumps(json))) job_order_object, _ = loader.resolve_all( job_order_object, file_uri(os.getcwd()) + "/", checklinks=False) except Exception as e: _logger.error("Job Loader: {}".format(str(e))) job_order_object = init_job_order(job_order_object, None, cwlwf, loader, sys.stdout) cwl_context['promises'] = job_order_object logging.info('{0}: Final job: \n {1}'.format( self.task_id, dumps(cwl_context, indent=4))) return cwl_context except Exception as e: _logger.info('Dispatch Exception {0}: \n {1} {2}'.format( self.task_id, type(e), e)) pass return None
def test_idmap() -> None: ldr = Loader({}) ldr.add_context({ "inputs": { "@id": "http://example.com/inputs", "mapSubject": "id", "mapPredicate": "a", }, "outputs": { "@type": "@id", "identity": True }, "id": "@id", }) ra, _ = ldr.resolve_all( cmap({ "id": "stuff", "inputs": { "zip": 1, "zing": 2 }, "outputs": ["out"], "other": { "n": 9 }, }), "http://example2.com/", ) assert isinstance(ra, CommentedMap) assert "http://example2.com/#stuff" == ra["id"] for item in ra["inputs"]: if item["a"] == 2: assert "http://example2.com/#stuff/zing" == item["id"] else: assert "http://example2.com/#stuff/zip" == item["id"] assert ["http://example2.com/#stuff/out"] == ra["outputs"] assert {"n": 9} == ra["other"]
def test_Loader_initialisation_with_neither_TMP_HOME_set(tmp_dir_fixture): import os from schema_salad.ref_resolver import Loader from requests import Session # Ensure HOME is missing. if "HOME" in os.environ: del os.environ["HOME"] if "TMP" in os.environ: del os.environ["TMP"] loader = Loader(ctx={}) assert isinstance(loader.session, Session)
def fetch_document( argsworkflow, # type: Union[Text, Dict[Text, Any]] resolver=None, # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text] fetcher_constructor=None # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher] ): # type: (...) -> Tuple[Loader, CommentedMap, Text] """Retrieve a CWL document.""" document_loader = Loader( jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore uri = None # type: Text workflowobj = None # type: CommentedMap if isinstance(argsworkflow, string_types): split = urllib.parse.urlsplit(argsworkflow) # In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that if split.scheme and split.scheme in [u'http', u'https', u'file']: uri = argsworkflow elif os.path.exists(os.path.abspath(argsworkflow)): uri = file_uri(str(os.path.abspath(argsworkflow))) elif resolver: uri = resolver(document_loader, argsworkflow) if uri is None: raise ValidationException("Not found: '%s'" % argsworkflow) if argsworkflow != uri: _logger.info("Resolved '%s' to '%s'", argsworkflow, uri) fileuri = urllib.parse.urldefrag(uri)[0] workflowobj = document_loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = "#" + Text(id(argsworkflow)) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri
def expand_cwl(cwl, uri, g): try: document_loader = Loader({ "cwl": "https://w3id.org/cwl/cwl#", "id": "@id" }) cwl = yaml.load(cwl) document_loader, avsc_names, processobj, metadata, uri = validate_document( document_loader, cwl, uri, strict=False) jsonld_context.makerdf(uri, processobj, document_loader.ctx, graph=g) sys.stderr.write("\n%s: imported ok\n" % (uri)) except Exception as e: sys.stderr.write("\n%s: %s\n" % (uri, e))
def test_secondaryFile_dsl_ref() -> None: ldr = Loader({}) ldr.add_context({"secondaryFiles": {"secondaryFilesDSL": True}}) ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo"}), "") assert {"secondaryFiles": {"pattern": ".foo", "required": None}} == ra ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo?"}), "") assert {"secondaryFiles": {"pattern": ".foo", "required": False}} == ra ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo"]}), "") assert {"secondaryFiles": [{"pattern": ".foo", "required": None}]} == ra ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo?"]}), "") assert {"secondaryFiles": [{"pattern": ".foo", "required": False}]} == ra
def test_fetch_inject_id(): l1 = Loader({"id": "@id"}) furi1 = file_uri(get_data("schema_salad/tests/inject-id1.yml")).lower() r1, _ = l1.resolve_ref(furi1) assert {"id": furi1 + "#foo", "bar": "baz"} == r1 assert [furi1, furi1 + "#foo"] == sorted(list(k.lower() for k in l1.idx.keys())) l2 = Loader({"id": "@id"}) furi2 = file_uri(get_data("schema_salad/tests/inject-id2.yml")).lower() r2, _ = l2.resolve_ref(furi2) assert {"id": furi2, "bar": "baz"} == r2 assert [furi2] == sorted(list(k.lower() for k in l2.idx.keys())) l3 = Loader({"id": "@id"}) furi3 = file_uri(get_data("schema_salad/tests/inject-id3.yml")).lower() r3, _ = l3.resolve_ref(furi3) assert {"id": "http://example.com", "bar": "baz"} == r3 assert [furi3, "http://example.com" ] == sorted(list(k.lower() for k in l3.idx.keys()))
def fetch_document(argsworkflow, # type: Union[Text, dict[Text, Any]] resolver=None, # type: Callable[[Loader, Union[Text, dict[Text, Any]]], Text] fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] ): # type: (...) -> Tuple[Loader, CommentedMap, Text] """Retrieve a CWL document.""" document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"}, fetcher_constructor=fetcher_constructor) uri = None # type: Text workflowobj = None # type: CommentedMap if isinstance(argsworkflow, basestring): split = urlparse.urlsplit(argsworkflow) if split.scheme: uri = argsworkflow elif os.path.exists(os.path.abspath(argsworkflow)): uri = "file://" + os.path.abspath(argsworkflow) elif resolver: uri = resolver(document_loader, argsworkflow) if uri is None: raise ValidationException("Not found: '%s'" % argsworkflow) if argsworkflow != uri: _logger.info("Resolved '%s' to '%s'", argsworkflow, uri) fileuri = urlparse.urldefrag(uri)[0] workflowobj = document_loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = "#" + Text(id(argsworkflow)) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri
def test_attachments(): furi = file_uri(get_data("schema_salad/tests/multidoc.yml")) l1 = Loader({}) r1, _ = l1.resolve_ref(furi) with open(get_data("schema_salad/tests/multidoc.yml"), "rt") as f: content = f.read() assert {"foo": "bar", "baz": content, "quux": content} == r1 l2 = Loader({}, allow_attachments=lambda x: x["foo"] == "bar") r2, _ = l2.resolve_ref(furi) assert { "foo": "bar", "baz": "This is the {first attachment}.\n", "quux": "This is the [second attachment].", } == r2 l3 = Loader({}, allow_attachments=lambda x: x["foo"] == "baz") r3, _ = l3.resolve_ref(furi) with open(get_data("schema_salad/tests/multidoc.yml"), "rt") as f: content = f.read() assert {"foo": "bar", "baz": content, "quux": content} == r3
def execute(self, context): initialized_job_order_object = init_job_order(self.dag.default_args["job_data"]["content"], Namespace(), self.dag.cwlwf, Loader(jobloaderctx.copy()), sys.stdout) updated_job_order_object = {} for index, inp in enumerate(self.dag.cwlwf.tool["inputs"]): inp_id = shortname(inp["id"]) if inp_id.split("/")[-1] in initialized_job_order_object: updated_job_order_object[inp_id] = initialized_job_order_object[inp_id.split("/")[-1]] updated_job_order_object["tmp_folder"] = tempfile.mkdtemp(dir=self.dag.default_args["job_data"]["content"].get("tmp_folder", DEFAULT_TMP_FOLDER), prefix="dag_tmp_") updated_job_order_object["output_folder"] = self.dag.default_args["job_data"]["content"]["output_folder"] logging.info("Dispatch job\n{}".format(json.dumps(updated_job_order_object, indent=4))) return {"outputs": updated_job_order_object}
def test_fetch_inject_id() -> None: path = get_data("schema_salad/tests/inject-id1.yml") assert path if is_fs_case_sensitive(os.path.dirname(path)): def lower(item: str) -> str: return item else: def lower(item: str) -> str: return item.lower() l1 = Loader({"id": "@id"}) furi1 = file_uri(path) r1, _ = l1.resolve_ref(furi1) assert {"id": furi1 + "#foo", "bar": "baz"} == r1 assert [lower(furi1), lower(furi1 + "#foo")] == sorted( list(lower(k) for k in l1.idx.keys()) ) l2 = Loader({"id": "@id"}) path2 = get_data("schema_salad/tests/inject-id2.yml") assert path2 furi2 = file_uri(path2) r2, _ = l2.resolve_ref(furi2) assert {"id": furi2, "bar": "baz"} == r2 assert [lower(furi2)] == sorted(list(lower(k) for k in l2.idx.keys())) l3 = Loader({"id": "@id"}) path3 = get_data("schema_salad/tests/inject-id3.yml") assert path3 furi3 = file_uri(path3) r3, _ = l3.resolve_ref(furi3) assert {"id": "http://example.com", "bar": "baz"} == r3 assert [lower(furi3), "http://example.com"] == sorted( list(lower(k) for k in l3.idx.keys()) )
def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, stdout=sys.stdout, make_fs_access=None): # type: (argparse.Namespace, Process, IO[Any], bool, bool, IO[Any], Type[StdFsAccess]) -> Union[int, Tuple[Dict[Text, Any], Text]] job_order_object = None if args.conformance_test: loader = Loader({}) else: jobloaderctx = { u"path": {u"@type": u"@id"}, u"location": {u"@type": u"@id"}, u"format": {u"@type": u"@id"}, u"id": u"@id"} jobloaderctx.update(t.metadata.get("$namespaces", {})) loader = Loader(jobloaderctx) if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.load(stdin) job_order_object, _ = loader.resolve_all(job_order_object, "") else: job_order_file = None if job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file)) try: job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False) except Exception as e: _logger.error(Text(e), exc_info=args.debug) return 1 toolparser = None else: input_basedir = args.basedir if args.basedir else os.getcwd() namemap = {} # type: Dict[Text, Text] toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap) if toolparser: if args.tool_help: toolparser.print_help() return 0 cmd_line = vars(toolparser.parse_args(args.job_order)) if cmd_line["job_order"]: try: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"])) job_order_object = loader.resolve_ref(cmd_line["job_order"]) except Exception as e: _logger.error(Text(e), exc_info=args.debug) return 1 else: job_order_object = {"id": args.workflow} job_order_object.update({namemap[k]: v for k,v in cmd_line.items()}) _logger.debug(u"Parsed job order from command line: %s", json.dumps(job_order_object, indent=4)) else: job_order_object = None for inp in t.tool["inputs"]: if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object): if not job_order_object: job_order_object = {} job_order_object[shortname(inp["id"])] = inp["default"] if not job_order_object and len(t.tool["inputs"]) > 0: if toolparser: print u"\nOptions for %s " % args.workflow toolparser.print_help() _logger.error("") _logger.error("Input object required, use --help for details") return 1 if print_input_deps: printdeps(job_order_object, loader, stdout, relative_deps, "", basedir=u"file://%s/" % input_basedir) return 0 def pathToLoc(p): if "location" not in p and "path" in p: p["location"] = p["path"] del p["path"] adjustDirObjs(job_order_object, pathToLoc) adjustFileObjs(job_order_object, pathToLoc) normalizeFilesDirs(job_order_object) adjustDirObjs(job_order_object, cast(Callable[..., Any], functools.partial(getListing, make_fs_access(input_basedir)))) if "cwl:tool" in job_order_object: del job_order_object["cwl:tool"] if "id" in job_order_object: del job_order_object["id"] return (job_order_object, input_basedir)
def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, stdout=sys.stdout, make_fs_access=None, fetcher_constructor=None): # type: (argparse.Namespace, Process, IO[Any], bool, bool, IO[Any], Callable[[Text], StdFsAccess], Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]) -> Union[int, Tuple[Dict[Text, Any], Text]] job_order_object = None _jobloaderctx = jobloaderctx.copy() _jobloaderctx.update(t.metadata.get("$namespaces", {})) loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor) if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.round_trip_load(stdin) # type: ignore job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/") else: job_order_file = None if job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file)) try: job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False) except Exception as e: _logger.error(Text(e), exc_info=args.debug) return 1 toolparser = None else: input_basedir = args.basedir if args.basedir else os.getcwd() namemap = {} # type: Dict[Text, Text] records = [] # type: List[Text] toolparser = generate_parser( argparse.ArgumentParser(prog=args.workflow), t, namemap, records) if toolparser: if args.tool_help: toolparser.print_help() return 0 cmd_line = vars(toolparser.parse_args(args.job_order)) for record_name in records: record = {} record_items = { k: v for k, v in cmd_line.iteritems() if k.startswith(record_name)} for key, value in record_items.iteritems(): record[key[len(record_name) + 1:]] = value del cmd_line[key] cmd_line[str(record_name)] = record if cmd_line["job_order"]: try: input_basedir = args.basedir if args.basedir else os.path.abspath( os.path.dirname(cmd_line["job_order"])) job_order_object = loader.resolve_ref(cmd_line["job_order"]) except Exception as e: _logger.error(Text(e), exc_info=args.debug) return 1 else: job_order_object = {"id": args.workflow} del cmd_line["job_order"] job_order_object.update({namemap[k]: v for k, v in cmd_line.items()}) if _logger.isEnabledFor(logging.DEBUG): _logger.debug(u"Parsed job order from command line: %s", json.dumps(job_order_object, indent=4)) else: job_order_object = None for inp in t.tool["inputs"]: if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object): if not job_order_object: job_order_object = {} job_order_object[shortname(inp["id"])] = inp["default"] if not job_order_object and len(t.tool["inputs"]) > 0: if toolparser: print(u"\nOptions for {} ".format(args.workflow)) toolparser.print_help() _logger.error("") _logger.error("Input object required, use --help for details") return 1 if print_input_deps: printdeps(job_order_object, loader, stdout, relative_deps, "", basedir=file_uri(input_basedir + "/")) return 0 def pathToLoc(p): if "location" not in p and "path" in p: p["location"] = p["path"] del p["path"] visit_class(job_order_object, ("File", "Directory"), pathToLoc) adjustDirObjs(job_order_object, trim_listing) normalizeFilesDirs(job_order_object) if "cwl:tool" in job_order_object: del job_order_object["cwl:tool"] if "id" in job_order_object: del job_order_object["id"] return (job_order_object, input_basedir)
def validate_document(document_loader, # type: Loader workflowobj, # type: CommentedMap uri, # type: Text enable_dev=False, # type: bool strict=True, # type: bool preprocess_only=False, # type: bool fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] ): # type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text] """Validate a CWL document.""" if isinstance(workflowobj, list): workflowobj = { "$graph": workflowobj } if not isinstance(workflowobj, dict): raise ValueError("workflowjobj must be a dict") jobobj = None if "cwl:tool" in workflowobj: jobobj, _ = document_loader.resolve_all(workflowobj, uri) uri = urlparse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"]) del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"] workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1] fileuri = urlparse.urldefrag(uri)[0] if "cwlVersion" in workflowobj: if not isinstance(workflowobj["cwlVersion"], (str, Text)): raise Exception("'cwlVersion' must be a string, got %s" % type(workflowobj["cwlVersion"])) workflowobj["cwlVersion"] = re.sub( r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", workflowobj["cwlVersion"]) else: _logger.warn("No cwlVersion found, treating this file as draft-2.") workflowobj["cwlVersion"] = "draft-2" if workflowobj["cwlVersion"] == "draft-2": workflowobj = cast(CommentedMap, cmap(update._draft2toDraft3dev1( workflowobj, document_loader, uri, update_steps=False))) if "@graph" in workflowobj: workflowobj["$graph"] = workflowobj["@graph"] del workflowobj["@graph"] (sch_document_loader, avsc_names) = \ process.get_schema(workflowobj["cwlVersion"])[:2] if isinstance(avsc_names, Exception): raise avsc_names processobj = None # type: Union[CommentedMap, CommentedSeq, unicode] document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph, idx=document_loader.idx, cache=sch_document_loader.cache, fetcher_constructor=fetcher_constructor) workflowobj["id"] = fileuri processobj, metadata = document_loader.resolve_all(workflowobj, fileuri) if not isinstance(processobj, (CommentedMap, CommentedSeq)): raise ValidationException("Workflow must be a dict or list.") if not metadata: if not isinstance(processobj, dict): raise ValidationException("Draft-2 workflows must be a dict.") metadata = cast(CommentedMap, cmap({"$namespaces": processobj.get("$namespaces", {}), "$schemas": processobj.get("$schemas", []), "cwlVersion": processobj["cwlVersion"]}, fn=fileuri)) _convert_stdstreams_to_files(workflowobj) if preprocess_only: return document_loader, avsc_names, processobj, metadata, uri schema.validate_doc(avsc_names, processobj, document_loader, strict) if metadata.get("cwlVersion") != update.LATEST: processobj = cast(CommentedMap, cmap(update.update( processobj, document_loader, fileuri, enable_dev, metadata))) if jobobj: metadata[u"cwl:defaults"] = jobobj return document_loader, avsc_names, processobj, metadata, uri
def validate_document(document_loader, # type: Loader workflowobj, # type: CommentedMap uri, # type: Text overrides, # type: List[Dict] metadata, # type: Dict[Text, Any] enable_dev=False, # type: bool strict=True, # type: bool preprocess_only=False, # type: bool fetcher_constructor=None, # type: FetcherConstructorType skip_schemas=None, # type: bool do_validate=True # type: bool ): # type: (...) -> Tuple[Loader, schema.Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text] """Validate a CWL document.""" if isinstance(workflowobj, MutableSequence): workflowobj = cmap({ "$graph": workflowobj }, fn=uri) if not isinstance(workflowobj, MutableMapping): raise ValueError("workflowjobj must be a dict, got '{}': {}".format( type(workflowobj), workflowobj)) jobobj = None if "cwl:tool" in workflowobj: job_loader = default_loader(fetcher_constructor) # type: ignore jobobj, _ = job_loader.resolve_all(workflowobj, uri, checklinks=do_validate) uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"]) del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"] if isinstance(jobobj, CommentedMap) and "http://commonwl.org/cwltool#overrides" in jobobj: overrides.extend(resolve_overrides(jobobj, uri, uri)) del jobobj["http://commonwl.org/cwltool#overrides"] workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1] fileuri = urllib.parse.urldefrag(uri)[0] if "cwlVersion" not in workflowobj: if 'cwlVersion' in metadata: workflowobj['cwlVersion'] = metadata['cwlVersion'] else: raise ValidationException( "No cwlVersion found. " "Use the following syntax in your CWL document to declare " "the version: cwlVersion: <version>.\n" "Note: if this is a CWL draft-2 (pre v1.0) document then it " "will need to be upgraded first.") if not isinstance(workflowobj["cwlVersion"], string_types): with SourceLine(workflowobj, "cwlVersion", ValidationException): raise ValidationException("'cwlVersion' must be a string, " "got {}".format( type(workflowobj["cwlVersion"]))) # strip out version workflowobj["cwlVersion"] = re.sub( r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", workflowobj["cwlVersion"]) if workflowobj["cwlVersion"] not in list(ALLUPDATES): # print out all the Supported Versions of cwlVersion versions = [] for version in list(ALLUPDATES): if "dev" in version: version += " (with --enable-dev flag only)" versions.append(version) versions.sort() raise ValidationException( "The CWL reference runner no longer supports pre CWL v1.0 " "documents. Supported versions are: " "\n{}".format("\n".join(versions))) (sch_document_loader, avsc_names) = \ process.get_schema(workflowobj["cwlVersion"])[:2] if isinstance(avsc_names, Exception): raise avsc_names processobj = None # type: Union[CommentedMap, CommentedSeq, Text, None] document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph, idx=document_loader.idx, cache=sch_document_loader.cache, fetcher_constructor=fetcher_constructor, skip_schemas=skip_schemas) _add_blank_ids(workflowobj) workflowobj["id"] = fileuri processobj, new_metadata = document_loader.resolve_all( workflowobj, fileuri, checklinks=do_validate) if not isinstance(processobj, (CommentedMap, CommentedSeq)): raise ValidationException("Workflow must be a dict or list.") if not new_metadata and isinstance(processobj, CommentedMap): new_metadata = cast(CommentedMap, cmap( {"$namespaces": processobj.get("$namespaces", {}), "$schemas": processobj.get("$schemas", []), "cwlVersion": processobj["cwlVersion"]}, fn=fileuri)) _convert_stdstreams_to_files(workflowobj) if preprocess_only: return document_loader, avsc_names, processobj, new_metadata, uri if do_validate: schema.validate_doc(avsc_names, processobj, document_loader, strict) if new_metadata.get("cwlVersion") != update.LATEST: processobj = cast(CommentedMap, cmap(update.update( processobj, document_loader, fileuri, enable_dev, new_metadata))) if jobobj is not None: new_metadata[u"cwl:defaults"] = jobobj if overrides: new_metadata[u"cwltool:overrides"] = overrides return document_loader, avsc_names, processobj, new_metadata, uri
def load_job_order(args, t, parser): job_order_object = None if args.conformance_test: loader = Loader({}) else: jobloaderctx = {"path": {"@type": "@id"}, "format": {"@type": "@id"}} jobloaderctx.update(t.metadata.get("$namespaces", {})) loader = Loader(jobloaderctx) if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.load(stdin) job_order_object, _ = loader.resolve_all(job_order_object, "") else: job_order_file = None if job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file)) try: job_order_object, _ = loader.resolve_ref(job_order_file) except Exception as e: _logger.error(e, exc_info=(e if args.debug else False)) return 1 toolparser = None else: input_basedir = args.basedir if args.basedir else os.getcwd() namemap = {} toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap) if toolparser: if args.tool_help: toolparser.print_help() return 0 cmd_line = vars(toolparser.parse_args(args.job_order)) if cmd_line["job_order"]: try: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"])) job_order_object = loader.resolve_ref(cmd_line["job_order"]) except Exception as e: _logger.error(e, exc_info=(e if args.debug else False)) return 1 else: job_order_object = {} job_order_object.update({namemap[k]: v for k,v in cmd_line.items()}) _logger.debug("Parsed job order from command line: %s", job_order_object) else: job_order_object = None for inp in t.tool["inputs"]: if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object): if not job_order_object: job_order_object = {} job_order_object[shortname(inp["id"])] = inp["default"] if not job_order_object and len(t.tool["inputs"]) > 0: parser.print_help() if toolparser: print "\nOptions for %s " % args.workflow toolparser.print_help() _logger.error("") _logger.error("Input object required") return 1 return (job_order_object, input_basedir)
def load_overrides(ov, base_url): # type: (Text, Text) -> List[Dict[Text, Any]] ovloader = Loader(overrides_ctx) return resolve_overrides(ovloader.fetch(ov), ov, base_url)
def resolve_and_validate_document(loadingContext, workflowobj, uri, preprocess_only=False, # type: bool skip_schemas=None, # type: bool ): # type: (...) -> Tuple[LoadingContext, Text] """Validate a CWL document.""" loadingContext = loadingContext.copy() if not isinstance(workflowobj, MutableMapping): raise ValueError("workflowjobj must be a dict, got '{}': {}".format( type(workflowobj), workflowobj)) jobobj = None if "cwl:tool" in workflowobj: jobobj, _ = loadingContext.loader.resolve_all(workflowobj, uri, checklinks=loadingContext.do_validate) uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"]) del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"] workflowobj = fetch_document(uri, loadingContext)[1] fileuri = urllib.parse.urldefrag(uri)[0] cwlVersion = workflowobj.get("cwlVersion") if not cwlVersion: fileobj = fetch_document(fileuri, loadingContext)[1] cwlVersion = fileobj.get("cwlVersion") if not cwlVersion: raise ValidationException( "No cwlVersion found. " "Use the following syntax in your CWL document to declare " "the version: cwlVersion: <version>.\n" "Note: if this is a CWL draft-2 (pre v1.0) document then it " "will need to be upgraded first.") if not isinstance(cwlVersion, string_types): with SourceLine(workflowobj, "cwlVersion", ValidationException): raise ValidationException("'cwlVersion' must be a string, " "got {}".format( type(cwlVersion))) # strip out version cwlVersion = re.sub( r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", cwlVersion) if cwlVersion not in list(ALLUPDATES): # print out all the Supported Versions of cwlVersion versions = [] for version in list(ALLUPDATES): if "dev" in version: version += " (with --enable-dev flag only)" versions.append(version) versions.sort() raise ValidationException( "The CWL reference runner no longer supports pre CWL v1.0 " "documents. Supported versions are: " "\n{}".format("\n".join(versions))) if isinstance(jobobj, CommentedMap) and "http://commonwl.org/cwltool#overrides" in jobobj: loadingContext.overrides_list.extend(resolve_overrides(jobobj, uri, uri)) del jobobj["http://commonwl.org/cwltool#overrides"] if isinstance(jobobj, CommentedMap) and "https://w3id.org/cwl/cwl#requirements" in jobobj: if cwlVersion not in ("v1.1.0-dev1",): raise ValidationException( "`cwl:requirements` in the input object is not part of CWL " "v1.0. You can adjust to use `cwltool:overrides` instead; or you " "can set the cwlVersion to v1.1.0-dev1 or greater and re-run with " "--enable-dev.") loadingContext.overrides_list.append({"overrideTarget": uri, "requirements": jobobj["https://w3id.org/cwl/cwl#requirements"]}) del jobobj["https://w3id.org/cwl/cwl#requirements"] (sch_document_loader, avsc_names) = \ process.get_schema(cwlVersion)[:2] if isinstance(avsc_names, Exception): raise avsc_names processobj = None # type: Union[CommentedMap, CommentedSeq, Text, None] document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph, idx=loadingContext.loader.idx, cache=sch_document_loader.cache, fetcher_constructor=loadingContext.fetcher_constructor, skip_schemas=skip_schemas) if cwlVersion == "v1.0": _add_blank_ids(workflowobj) workflowobj["id"] = fileuri processobj, metadata = document_loader.resolve_all( workflowobj, fileuri, checklinks=loadingContext.do_validate) if not isinstance(processobj, (CommentedMap, CommentedSeq)): raise ValidationException("Workflow must be a CommentedMap or CommentedSeq.") if not isinstance(metadata, CommentedMap): raise ValidationException("metadata must be a CommentedMap, was %s" % type(metadata)) _convert_stdstreams_to_files(workflowobj) if preprocess_only: return loadingContext, uri if loadingContext.do_validate: schema.validate_doc(avsc_names, processobj, document_loader, loadingContext.strict) if loadingContext.do_update: processobj = cast(CommentedMap, cmap(update.update( processobj, document_loader, fileuri, loadingContext.enable_dev, metadata))) if isinstance(processobj, MutableMapping): document_loader.idx[processobj["id"]] = processobj elif isinstance(processobj, MutableSequence): document_loader.idx[metadata["id"]] = metadata for po in processobj: document_loader.idx[po["id"]] = po if jobobj is not None: loadingContext.jobdefaults = jobobj loadingContext.loader = document_loader loadingContext.avsc_names = avsc_names loadingContext.metadata = metadata return loadingContext, uri
def load_job_order(args, t, parser, stdin, print_input_deps=False, relative_deps=False, stdout=sys.stdout): # type: (argparse.Namespace, Process, argparse.ArgumentParser, IO[Any], bool, bool, IO[Any]) -> Union[int,Tuple[Dict[str,Any],str]] job_order_object = None if args.conformance_test: loader = Loader({}) else: jobloaderctx = { "path": {"@type": "@id"}, "format": {"@type": "@id"}, "id": "@id"} jobloaderctx.update(t.metadata.get("$namespaces", {})) loader = Loader(jobloaderctx) if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] elif len(args.job_order) == 1 and args.job_order[0] == "-": job_order_object = yaml.load(stdin) job_order_object, _ = loader.resolve_all(job_order_object, "") else: job_order_file = None if job_order_object: input_basedir = args.basedir if args.basedir else os.getcwd() elif job_order_file: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file)) try: job_order_object, _ = loader.resolve_ref(job_order_file) except Exception as e: _logger.error(str(e), exc_info=(e if args.debug else False)) return 1 toolparser = None else: input_basedir = args.basedir if args.basedir else os.getcwd() namemap = {} # type: Dict[str,str] toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap) if toolparser: if args.tool_help: toolparser.print_help() return 0 cmd_line = vars(toolparser.parse_args(args.job_order)) if cmd_line["job_order"]: try: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"])) job_order_object = loader.resolve_ref(cmd_line["job_order"]) except Exception as e: _logger.error(str(e), exc_info=(e if args.debug else False)) return 1 else: job_order_object = {"id": args.workflow} job_order_object.update({namemap[k]: v for k,v in cmd_line.items()}) _logger.debug(u"Parsed job order from command line: %s", json.dumps(job_order_object, indent=4)) else: job_order_object = None for inp in t.tool["inputs"]: if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object): if not job_order_object: job_order_object = {} job_order_object[shortname(inp["id"])] = inp["default"] if not job_order_object and len(t.tool["inputs"]) > 0: parser.print_help() if toolparser: print u"\nOptions for %s " % args.workflow toolparser.print_help() _logger.error("") _logger.error("Input object required") return 1 if print_input_deps: printdeps(job_order_object, loader, stdout, relative_deps, basedir=u"file://%s/" % input_basedir) return 0 if "cwl:tool" in job_order_object: del job_order_object["cwl:tool"] if "id" in job_order_object: del job_order_object["id"] return (job_order_object, input_basedir)
def main(args=None, executor=single_job_executor, makeTool=workflow.defaultMakeTool, parser=None): if args is None: args = sys.argv[1:] if parser is None: parser = arg_parser() args = parser.parse_args(args) if args.quiet: _logger.setLevel(logging.WARN) if args.debug: _logger.setLevel(logging.DEBUG) pkg = pkg_resources.require("cwltool") if pkg: if args.version: print "%s %s" % (sys.argv[0], pkg[0].version) return 0 else: _logger.info("%s %s", sys.argv[0], pkg[0].version) if not args.workflow: parser.print_help() _logger.error("") _logger.error("CWL document required") return 1 t = load_tool(args.workflow, args.update, args.strict, makeTool, args.debug) if type(t) == int: return t if args.print_rdf: printrdf(args.workflow, processobj, ctx, args.rdf_serializer) return 0 if args.print_dot: printdot(args.workflow, processobj, ctx, args.rdf_serializer) return 0 if args.tmp_outdir_prefix != 'tmp': # Use user defined temp directory (if it exists) args.tmp_outdir_prefix = os.path.abspath(args.tmp_outdir_prefix) if not os.path.exists(args.tmp_outdir_prefix): _logger.error("Intermediate output directory prefix doesn't exist, reverting to default") return 1 if args.tmpdir_prefix != 'tmp': # Use user defined prefix (if the folder exists) args.tmpdir_prefix = os.path.abspath(args.tmpdir_prefix) if not os.path.exists(args.tmpdir_prefix): _logger.error("Temporary directory prefix doesn't exist.") return 1 if len(args.job_order) == 1 and args.job_order[0][0] != "-": job_order_file = args.job_order[0] else: job_order_file = None if args.conformance_test: loader = Loader({}) else: loader = Loader({"id": "@id", "path": {"@type": "@id"}}) if job_order_file: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file)) try: job_order_object, _ = loader.resolve_ref(job_order_file) except Exception as e: _logger.error(e, exc_info=(e if args.debug else False)) return 1 toolparser = None else: input_basedir = args.basedir if args.basedir else os.getcwd() namemap = {} toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap) if toolparser: if args.tool_help: toolparser.print_help() return 0 cmd_line = vars(toolparser.parse_args(args.job_order)) if cmd_line["job_order"]: try: input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"])) job_order_object = loader.resolve_ref(cmd_line["job_order"]) except Exception as e: _logger.error(e, exc_info=(e if args.debug else False)) return 1 else: job_order_object = {} job_order_object.update({namemap[k]: v for k,v in cmd_line.items()}) _logger.debug("Parsed job order from command line: %s", job_order_object) else: job_order_object = None if not job_order_object: parser.print_help() if toolparser: print "\nOptions for %s " % args.workflow toolparser.print_help() _logger.error("") _logger.error("Input object required") return 1 try: out = executor(t, job_order_object, input_basedir, args, conformance_test=args.conformance_test, dry_run=args.dry_run, outdir=args.outdir, tmp_outdir_prefix=args.tmp_outdir_prefix, use_container=args.use_container, preserve_environment=args.preserve_environment, pull_image=args.enable_pull, rm_container=args.rm_container, tmpdir_prefix=args.tmpdir_prefix, rm_tmpdir=args.rm_tmpdir, makeTool=makeTool, move_outputs=args.move_outputs ) # This is the workflow output, it needs to be written sys.stdout.write(json.dumps(out, indent=4)) except (validate.ValidationException) as e: _logger.error("Input object failed validation:\n%s", e, exc_info=(e if args.debug else False)) return 1 except workflow.WorkflowException as e: _logger.error("Workflow error:\n %s", e, exc_info=(e if args.debug else False)) return 1 return 0
def validate_document(document_loader, # type: Loader workflowobj, # type: CommentedMap uri, # type: Text enable_dev=False, # type: bool strict=True, # type: bool preprocess_only=False, # type: bool fetcher_constructor=None, # type: FetcherConstructorType skip_schemas=None, # type: bool overrides=None, # type: List[Dict] metadata=None, # type: Optional[Dict] ): # type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text] """Validate a CWL document.""" if isinstance(workflowobj, list): workflowobj = cmap({ "$graph": workflowobj }, fn=uri) if not isinstance(workflowobj, dict): raise ValueError("workflowjobj must be a dict, got '%s': %s" % (type(workflowobj), workflowobj)) jobobj = None if "cwl:tool" in workflowobj: job_loader = default_loader(fetcher_constructor) # type: ignore jobobj, _ = job_loader.resolve_all(workflowobj, uri) uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"]) del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"] if "http://commonwl.org/cwltool#overrides" in jobobj: overrides.extend(resolve_overrides(jobobj, uri, uri)) del jobobj["http://commonwl.org/cwltool#overrides"] workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1] fileuri = urllib.parse.urldefrag(uri)[0] if "cwlVersion" not in workflowobj: if metadata and 'cwlVersion' in metadata: workflowobj['cwlVersion'] = metadata['cwlVersion'] else: raise ValidationException("No cwlVersion found." "Use the following syntax in your CWL document to declare " "the version: cwlVersion: <version>") if not isinstance(workflowobj["cwlVersion"], (str, Text)): raise Exception("'cwlVersion' must be a string, got %s" % type(workflowobj["cwlVersion"])) # strip out version workflowobj["cwlVersion"] = re.sub( r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", workflowobj["cwlVersion"]) if workflowobj["cwlVersion"] not in list(ALLUPDATES): # print out all the Supported Versions of cwlVersion versions = list(ALLUPDATES) # ALLUPDATES is a dict versions.sort() raise ValidationException("'cwlVersion' not valid. Supported CWL versions are: \n{}".format("\n".join(versions))) if workflowobj["cwlVersion"] == "draft-2": workflowobj = cast(CommentedMap, cmap(update._draft2toDraft3dev1( workflowobj, document_loader, uri, update_steps=False))) if "@graph" in workflowobj: workflowobj["$graph"] = workflowobj["@graph"] del workflowobj["@graph"] (sch_document_loader, avsc_names) = \ process.get_schema(workflowobj["cwlVersion"])[:2] if isinstance(avsc_names, Exception): raise avsc_names processobj = None # type: Union[CommentedMap, CommentedSeq, Text] document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph, idx=document_loader.idx, cache=sch_document_loader.cache, fetcher_constructor=fetcher_constructor, skip_schemas=skip_schemas) _add_blank_ids(workflowobj) workflowobj["id"] = fileuri processobj, new_metadata = document_loader.resolve_all(workflowobj, fileuri) if not isinstance(processobj, (CommentedMap, CommentedSeq)): raise ValidationException("Workflow must be a dict or list.") if not new_metadata: if not isinstance(processobj, dict): raise ValidationException("Draft-2 workflows must be a dict.") new_metadata = cast(CommentedMap, cmap( {"$namespaces": processobj.get("$namespaces", {}), "$schemas": processobj.get("$schemas", []), "cwlVersion": processobj["cwlVersion"]}, fn=fileuri)) _convert_stdstreams_to_files(workflowobj) if preprocess_only: return document_loader, avsc_names, processobj, new_metadata, uri schema.validate_doc(avsc_names, processobj, document_loader, strict) if new_metadata.get("cwlVersion") != update.LATEST: processobj = cast(CommentedMap, cmap(update.update( processobj, document_loader, fileuri, enable_dev, new_metadata))) if jobobj: new_metadata[u"cwl:defaults"] = jobobj if overrides: new_metadata[u"cwltool:overrides"] = overrides return document_loader, avsc_names, processobj, new_metadata, uri