def test_secondaryFile_dsl_ref(self): ldr = schema_salad.ref_resolver.Loader({}) ldr.add_context({"secondaryFiles": {"secondaryFilesDSL": True}}) ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo"}), "") self.assertEqual( {"secondaryFiles": { 'pattern': '.foo', 'required': None }}, ra) ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo?"}), "") self.assertEqual( {"secondaryFiles": { 'pattern': '.foo', 'required': False }}, ra) ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo"]}), "") self.assertEqual( {"secondaryFiles": [{ 'pattern': '.foo', 'required': None }]}, ra) ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo?"]}), "") self.assertEqual( {"secondaryFiles": [{ 'pattern': '.foo', 'required': False }]}, ra)
def test_mixin(self): base_url = schema_salad.ref_resolver.file_uri(os.path.join(os.getcwd(), "tests")) ldr = schema_salad.ref_resolver.Loader({}) ra = ldr.resolve_ref(cmap({"$mixin": get_data("tests/mixin.yml"), "one": "five"}), base_url=base_url) self.assertEqual({'id': 'four', 'one': 'five'}, ra[0]) ldr = schema_salad.ref_resolver.Loader({"id": "@id"}) ra = ldr.resolve_all(cmap([{ "id": "a", "m": {"$mixin": get_data("tests/mixin.yml")} }, { "id": "b", "m": {"$mixin": get_data("tests/mixin.yml")} }]), base_url=base_url) self.assertEqual([{ 'id': base_url + '#a', 'm': { 'id': base_url + u'#a/four', 'one': 'two' }, }, { 'id': base_url + '#b', 'm': { 'id': base_url + u'#b/four', 'one': 'two'} }], ra[0])
def test_typedsl_ref(self): ldr = schema_salad.ref_resolver.Loader({}) ldr.add_context({ "File": "http://example.com/File", "null": "http://example.com/null", "array": "http://example.com/array", "type": { "@type": "@vocab", "typeDSL": True } }) ra, _ = ldr.resolve_all(cmap({"type": "File"}), "") self.assertEqual({'type': 'File'}, ra) ra, _ = ldr.resolve_all(cmap({"type": "File?"}), "") self.assertEqual({'type': ['null', 'File']}, ra) ra, _ = ldr.resolve_all(cmap({"type": "File[]"}), "") self.assertEqual({'type': {'items': 'File', 'type': 'array'}}, ra) ra, _ = ldr.resolve_all(cmap({"type": "File[]?"}), "") self.assertEqual( {'type': ['null', { 'items': 'File', 'type': 'array' }]}, ra)
def _add(wf): found.append(True) hints = wf.setdefault("hints", {}) obj = cmap({"upstream": upstream}) if set_version: obj["version"] = set_version if install_to: obj["installTo"] = install_to if isinstance(hints, list): for h in hints: if expand_ns(namespaces, h["class"]) == CWLDEP_DEPENDENCIES_URL: for u in h["dependencies"]: if u["upstream"] == upstream: u.update(obj) return h["dependencies"].append(cmap(obj)) return hints.append( cmap({ "class": "dep:Dependencies", "dependencies": [obj] })) elif isinstance(hints, dict): for h in hints: if expand_ns(namespaces, h) == CWLDEP_DEPENDENCIES_URL: for u in hints[h]["dependencies"]: if u["upstream"] == upstream: u.update(obj) return hints[h]["dependencies"].append(cmap(obj)) return hints["dep:Dependencies"] = cmap({"dependencies": [obj]})
def test_scoped_id() -> None: ldr = Loader({}) ctx = { "id": "@id", "location": {"@id": "@id", "@type": "@id"}, "bar": "http://example.com/bar", "ex": "http://example.com/", } # type: ContextType ldr.add_context(ctx) ra, _ = ldr.resolve_all( cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com" ) assert { "id": "http://example.com/#foo", "bar": {"id": "http://example.com/#foo/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"location": "foo", "bar": {"location": "baz"}}), "http://example.com", checklinks=False, ) assert { "location": "http://example.com/foo", "bar": {"location": "http://example.com/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"id": "foo", "bar": {"location": "baz"}}), "http://example.com", checklinks=False, ) assert { "id": "http://example.com/#foo", "bar": {"location": "http://example.com/baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all( cmap({"location": "foo", "bar": {"id": "baz"}}), "http://example.com", checklinks=False, ) assert { "location": "http://example.com/foo", "bar": {"id": "http://example.com/#baz"}, } == ra g = makerdf(None, ra, ctx) print(g.serialize(format="n3"))
def test_secondaryFile_dsl_ref() -> None: ldr = Loader({}) ldr.add_context({"secondaryFiles": {"secondaryFilesDSL": True}}) ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo"}), "") assert {"secondaryFiles": {"pattern": ".foo", "required": None}} == ra ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo?"}), "") assert {"secondaryFiles": {"pattern": ".foo", "required": False}} == ra ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo"]}), "") assert {"secondaryFiles": [{"pattern": ".foo", "required": None}]} == ra ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo?"]}), "") assert {"secondaryFiles": [{"pattern": ".foo", "required": False}]} == ra
def get_overall_res_req(res_reqs): """Take the overall of a list of ResourceRequirement, i.e., the max of coresMin, coresMax, ramMin, ramMax, tmpdirMin, tmpdirMax and the sum of outdirMin, outdirMax.""" all_res_req = {} exception_msgs = [] for a in max_res_pars + sum_res_pars: all_res_req[a] = [] for res_req in res_reqs: if a in res_req: if isinstance(res_req[a], int): # integer check all_res_req[a].append(res_req[a]) else: msg = SourceLine(res_req, a).makeError( "Non-top-level ResourceRequirement in single container cannot have expressions") exception_msgs.append(msg) if exception_msgs: raise WorkflowException("\n".join(exception_msgs)) else: overall_res_req = {} for a in all_res_req: if all_res_req[a]: if a in max_res_pars: overall_res_req[a] = max(all_res_req[a]) elif a in sum_res_pars: overall_res_req[a] = sum(all_res_req[a]) if overall_res_req: overall_res_req["class"] = "ResourceRequirement" return cmap(overall_res_req)
def fetch_document( argsworkflow, # type: Union[Text, Dict[Text, Any]] resolver=None, # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text] fetcher_constructor=None # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher] ): # type: (...) -> Tuple[Loader, CommentedMap, Text] """Retrieve a CWL document.""" document_loader = Loader( jobloaderctx, fetcher_constructor=fetcher_constructor) # type: ignore uri = None # type: Text workflowobj = None # type: CommentedMap if isinstance(argsworkflow, string_types): uri, fileuri = resolve_tool_uri(argsworkflow, resolver=resolver, document_loader=document_loader) workflowobj = document_loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = "#" + Text(id(argsworkflow)) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri
def test_schemas(self): loader = schema_salad.ref_resolver.Loader({}) ra, _ = loader.resolve_all( cmap({ u"$schemas": [ schema_salad.ref_resolver.file_uri( get_data("tests/EDAM.owl")) ], u"$namespaces": { u"edam": u"http://edamontology.org/" }, u"edam:has_format": u"edam:format_1915" }), "") self.assertEqual( { u"$schemas": [ schema_salad.ref_resolver.file_uri( get_data("tests/EDAM.owl")) ], u"$namespaces": { u"edam": u"http://edamontology.org/" }, u'http://edamontology.org/has_format': u'http://edamontology.org/format_1915' }, ra)
def test_idmap(self): ldr = schema_salad.ref_resolver.Loader({}) ldr.add_context({ "inputs": { "@id": "http://example.com/inputs", "mapSubject": "id", "mapPredicate": "a" }, "outputs": { "@type": "@id", "identity": True, }, "id": "@id"}) ra, _ = ldr.resolve_all(cmap({ "id": "stuff", "inputs": { "zip": 1, "zing": 2 }, "outputs": ["out"], "other": { 'n': 9 } }), "http://example2.com/") self.assertEqual("http://example2.com/#stuff", ra["id"]) for item in ra["inputs"]: if item["a"] == 2: self.assertEqual( 'http://example2.com/#stuff/zing', item["id"]) else: self.assertEqual('http://example2.com/#stuff/zip', item["id"]) self.assertEqual(['http://example2.com/#stuff/out'], ra['outputs']) self.assertEqual({'n': 9}, ra['other'])
def test_commandLineTool_job_tmpdir_prefix(tmp_path: Path) -> None: """Test that non-docker enabled CommandLineTool respects temp directory directives.""" loading_context = LoadingContext({ "metadata": { "cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": INTERNAL_VERSION, } }) clt = CommandLineTool( cast( CommentedMap, cmap({ "cwlVersion": INTERNAL_VERSION, "class": "CommandLineTool", "inputs": [], "outputs": [], "requirements": [], }), ), loading_context, ) tmpdir_prefix = str(tmp_path / "1") tmp_outdir_prefix = str(tmp_path / "2") runtime_context = RuntimeContext({ "tmpdir_prefix": tmpdir_prefix, "tmp_outdir_prefix": tmp_outdir_prefix, }) job = next(clt.job({}, None, runtime_context)) assert isinstance(job, JobBase) assert job.stagedir and job.stagedir.startswith(tmpdir_prefix) assert job.tmpdir and job.tmpdir.startswith(tmpdir_prefix) assert job.outdir and job.outdir.startswith(tmp_outdir_prefix)
def fetch_document(argsworkflow, # type: Union[Text, Dict[Text, Any]] loadingContext=None # type: Optional[LoadingContext] ): # type: (...) -> Tuple[LoadingContext, CommentedMap, Text] """Retrieve a CWL document.""" if loadingContext is None: loadingContext = LoadingContext() loadingContext.loader = default_loader() else: loadingContext = loadingContext.copy() if loadingContext.loader is None: loadingContext.loader = default_loader(loadingContext.fetcher_constructor) uri = None # type: Optional[Text] workflowobj = None # type: Optional[CommentedMap] if isinstance(argsworkflow, string_types): uri, fileuri = resolve_tool_uri(argsworkflow, resolver=loadingContext.resolver, document_loader=loadingContext.loader) workflowobj = loadingContext.loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = argsworkflow["id"] if argsworkflow.get("id") else "_:" + Text(uuid.uuid4()) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) loadingContext.loader.idx[uri] = workflowobj else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) assert workflowobj is not None return loadingContext, workflowobj, uri
def test_schemas(): loader = schema_salad.ref_resolver.Loader({}) ra, _ = loader.resolve_all( cmap({ "$schemas": [schema_salad.ref_resolver.file_uri(get_data("tests/EDAM.owl"))], "$namespaces": { "edam": "http://edamontology.org/" }, "edam:has_format": "edam:format_1915", }), "", ) assert { "$schemas": [schema_salad.ref_resolver.file_uri(get_data("tests/EDAM.owl"))], "$namespaces": { "edam": "http://edamontology.org/" }, "http://edamontology.org/has_format": "http://edamontology.org/format_1915", } == ra
def test_rdf_datetime() -> None: """Affirm that datetime objects can be serialized in makerdf().""" ldr = Loader({}) ctx: ContextType = { "id": "@id", "location": {"@id": "@id", "@type": "@id"}, "bar": "http://example.com/bar", "ex": "http://example.com/", } ldr.add_context(ctx) ra: CommentedMap = cast( CommentedMap, ldr.resolve_all( cmap( { "id": "foo", "bar": {"id": "baz"}, } ), "http://example.com", )[0], ) ra["s:dateCreated"] = datetime.datetime(2020, 10, 8) g = makerdf(None, ra, ctx) g.serialize(destination=stdout(), format="n3") g2 = makerdf(None, CommentedSeq([ra]), ctx) g2.serialize(destination=stdout(), format="n3")
def fetch_document( argsworkflow: Union[str, Dict[str, Any]], loadingContext: Optional[LoadingContext] = None, ) -> Tuple[LoadingContext, CommentedMap, str]: """Retrieve a CWL document.""" if loadingContext is None: loadingContext = LoadingContext() loadingContext.loader = default_loader() else: loadingContext = loadingContext.copy() if loadingContext.loader is None: loadingContext.loader = default_loader( loadingContext.fetcher_constructor, enable_dev=loadingContext.enable_dev, doc_cache=loadingContext.doc_cache, ) if isinstance(argsworkflow, str): uri, fileuri = resolve_tool_uri( argsworkflow, resolver=loadingContext.resolver, document_loader=loadingContext.loader, ) workflowobj = loadingContext.loader.fetch(fileuri) return loadingContext, workflowobj, uri if isinstance(argsworkflow, dict): uri = argsworkflow["id"] if argsworkflow.get("id") else "_:" + str( uuid.uuid4()) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) loadingContext.loader.idx[uri] = workflowobj return loadingContext, workflowobj, uri raise ValidationException("Must be URI or object: '%s'" % argsworkflow)
def fetch_document( argsworkflow, # type: Union[Text, Dict[Text, Any]] loadingContext=None # type: Optional[LoadingContext] ): # type: (...) -> Tuple[LoadingContext, CommentedMap, Text] """Retrieve a CWL document.""" if loadingContext is None: loadingContext = LoadingContext() loadingContext.loader = default_loader() else: loadingContext = loadingContext.copy() if loadingContext.loader is None: loadingContext.loader = default_loader( loadingContext.fetcher_constructor) uri = None # type: Optional[Text] workflowobj = None # type: Optional[CommentedMap] if isinstance(argsworkflow, string_types): uri, fileuri = resolve_tool_uri(argsworkflow, resolver=loadingContext.resolver, document_loader=loadingContext.loader) workflowobj = loadingContext.loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = argsworkflow["id"] if argsworkflow.get("id") else "_:" + Text( uuid.uuid4()) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) loadingContext.loader.idx[uri] = workflowobj else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) assert workflowobj is not None return loadingContext, workflowobj, uri
def test_idmap() -> None: ldr = Loader({}) ldr.add_context( { "inputs": { "@id": "http://example.com/inputs", "mapSubject": "id", "mapPredicate": "a", }, "outputs": {"@type": "@id", "identity": True}, "id": "@id", } ) ra, _ = ldr.resolve_all( cmap( { "id": "stuff", "inputs": {"zip": 1, "zing": 2}, "outputs": ["out"], "other": {"n": 9}, } ), "http://example2.com/", ) assert isinstance(ra, CommentedMap) assert "http://example2.com/#stuff" == ra["id"] for item in ra["inputs"]: if item["a"] == 2: assert "http://example2.com/#stuff/zing" == item["id"] else: assert "http://example2.com/#stuff/zip" == item["id"] assert ["http://example2.com/#stuff/out"] == ra["outputs"] assert {"n": 9} == ra["other"]
def test_nullable_links() -> None: ldr = schema_salad.ref_resolver.Loader({}) ctx = {"link": {"@type": "@id"}} # type: ContextType ldr.add_context(ctx) ra, _ = ldr.resolve_all(cmap({"link": None}), "http://example.com", checklinks=True) assert {"link": None} == ra
def test_run(self, keepdocker): for enable_reuse in (True, False): arv_docker_clear_cache() runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.intermediate_output_ttl = 0 keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") tool = cmap({ "inputs": [], "outputs": [], "baseCommand": "ls", "arguments": [{"valueFrom": "$(runtime.outdir)"}] }) make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run_"+str(enable_reuse), make_fs_access=make_fs_access, tmpdir="/tmp"): j.run(enable_reuse=enable_reuse) runner.api.container_requests().create.assert_called_with( body=JsonDiffMatcher({ 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_run_'+str(enable_reuse), 'runtime_constraints': { 'vcpus': 1, 'ram': 1073741824 }, 'use_existing': enable_reuse, 'priority': 1, 'mounts': { '/tmp': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 1073741824 } }, 'state': 'Committed', 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'output_ttl': 0, 'container_image': 'arvados/jobs', 'command': ['ls', '/var/spool/cwl'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': {}, 'properties': {}, }))
def test_jsonld_ctx() -> None: ldr, _, _, _ = schema_salad.schema.load_schema( cmap( { "$base": "Y", "name": "X", "$namespaces": {"foo": "http://example.com/foo#"}, "$graph": [ {"name": "ExampleType", "type": "enum", "symbols": ["asym", "bsym"]} ], } ) ) ra, _ = ldr.resolve_all(cmap({"foo:bar": "asym"}), "X") assert ra == {"http://example.com/foo#bar": "asym"}
def test_mixin() -> None: base_url = file_uri(os.path.join(os.getcwd(), "tests")) ldr = Loader({}) path = get_data("tests/mixin.yml") assert path ra = ldr.resolve_ref(cmap({"$mixin": path, "one": "five"}), base_url=base_url) assert {"id": "four", "one": "five"} == ra[0] ldr = Loader({"id": "@id"}) ra = ldr.resolve_all( cmap([{"id": "a", "m": {"$mixin": path}}, {"id": "b", "m": {"$mixin": path}}]), base_url=base_url, ) assert [ {"id": base_url + "#a", "m": {"id": base_url + "#a/four", "one": "two"}}, {"id": base_url + "#b", "m": {"id": base_url + "#b/four", "one": "two"}}, ] == ra[0]
def _convert_stdstreams_to_files( workflowobj: Union[MutableMapping[str, Any], MutableSequence[Union[Dict[str, Any], str, int]], str] ) -> None: if isinstance(workflowobj, MutableMapping): if workflowobj.get("class") == "CommandLineTool": with SourceLine( workflowobj, "outputs", ValidationException, _logger.isEnabledFor(logging.DEBUG), ): outputs = workflowobj.get("outputs", []) if not isinstance(outputs, CommentedSeq): raise ValidationException('"outputs" section is not ' "valid.") for out in workflowobj.get("outputs", []): if not isinstance(out, CommentedMap): raise ValidationException( "Output '{}' is not a valid " "OutputParameter.".format(out)) for streamtype in ["stdout", "stderr"]: if out.get("type") == streamtype: if "outputBinding" in out: raise ValidationException( "Not allowed to specify outputBinding when" " using %s shortcut." % streamtype) if streamtype in workflowobj: filename = workflowobj[streamtype] else: filename = str( hashlib.sha1( # nosec json_dumps(workflowobj, sort_keys=True).encode( "utf-8")).hexdigest()) workflowobj[streamtype] = filename out["type"] = "File" out["outputBinding"] = cmap({"glob": filename}) for inp in workflowobj.get("inputs", []): if inp.get("type") == "stdin": if "inputBinding" in inp: raise ValidationException( "Not allowed to specify inputBinding when" " using stdin shortcut.") if "stdin" in workflowobj: raise ValidationException( "Not allowed to specify stdin path when" " using stdin type shortcut.") else: workflowobj["stdin"] = ("$(inputs.%s.path)" % inp["id"].rpartition("#")[2]) inp["type"] = "File" else: for entry in workflowobj.values(): _convert_stdstreams_to_files(entry) if isinstance(workflowobj, MutableSequence): for entry in workflowobj: _convert_stdstreams_to_files(entry)
def test_mixin(): base_url = schema_salad.ref_resolver.file_uri( os.path.join(os.getcwd(), "tests")) ldr = schema_salad.ref_resolver.Loader({}) ra = ldr.resolve_ref(cmap({ "$mixin": get_data("tests/mixin.yml"), "one": "five" }), base_url=base_url) assert {"id": "four", "one": "five"} == ra[0] ldr = schema_salad.ref_resolver.Loader({"id": "@id"}) ra = ldr.resolve_all( cmap([ { "id": "a", "m": { "$mixin": get_data("tests/mixin.yml") } }, { "id": "b", "m": { "$mixin": get_data("tests/mixin.yml") } }, ]), base_url=base_url, ) assert [ { "id": base_url + "#a", "m": { "id": base_url + "#a/four", "one": "two" } }, { "id": base_url + "#b", "m": { "id": base_url + "#b/four", "one": "two" } }, ] == ra[0]
def setSecondary(t, fileobj, discovered): if isinstance(fileobj, dict) and fileobj.get("class") == "File": if "secondaryFiles" not in fileobj: fileobj["secondaryFiles"] = cmap([{"location": substitute(fileobj["location"], sf), "class": "File"} for sf in t["secondaryFiles"]]) if discovered is not None: discovered[fileobj["location"]] = fileobj["secondaryFiles"] elif isinstance(fileobj, list): for e in fileobj: setSecondary(t, e, discovered)
def test_import_list() -> None: import schema_salad.ref_resolver from schema_salad.sourceline import cmap basedir = schema_salad.ref_resolver.file_uri(os.path.dirname(__file__) + "/") loader = schema_salad.ref_resolver.Loader({}) ra, _ = loader.resolve_all(cmap({"foo": {"$import": "list.json"}}), basedir) assert {"foo": ["bar", "baz"]} == ra
def test_jsonld_ctx(self): ldr, _, _, _ = schema_salad.schema.load_schema(cmap({ "$base": "Y", "name": "X", "$namespaces": { "foo": "http://example.com/foo#" }, "$graph": [{ "name": "ExampleType", "type": "enum", "symbols": ["asym", "bsym"]}] })) ra, _ = ldr.resolve_all(cmap({"foo:bar": "asym"}), "X") self.assertEqual(ra, { 'http://example.com/foo#bar': 'asym' })
def test_run(self, keepdocker): for enable_reuse in (True, False): arv_docker_clear_cache() runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") tool = cmap({ "inputs": [], "outputs": [], "baseCommand": "ls", "arguments": [{"valueFrom": "$(runtime.outdir)"}] }) make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run_"+str(enable_reuse), make_fs_access=make_fs_access, tmpdir="/tmp"): j.run(enable_reuse=enable_reuse) runner.api.container_requests().create.assert_called_with( body=JsonDiffMatcher({ 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_run_'+str(enable_reuse), 'runtime_constraints': { 'vcpus': 1, 'ram': 1073741824 }, 'use_existing': enable_reuse, 'priority': 1, 'mounts': { '/tmp': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 1073741824 } }, 'state': 'Committed', 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'container_image': 'arvados/jobs', 'command': ['ls', '/var/spool/cwl'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': {}, 'properties': {}, }))
def main(): parser = argparse.ArgumentParser() parser.add_argument("swagger") parser.add_argument("annotations") parser.add_argument("url") parser.add_argument("--print-rdf", action="store_true", default=False) parser.add_argument("--serve", action="store_true", default=False) parser.add_argument("--fuseki-path", type=str, default=".") args = parser.parse_args() warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning) with open(args.annotations) as f2: annotations = yaml.load(f2) with open(args.swagger) as f: sld = swg2salad.swg2salad(yaml.load(f), annotations) sld["$base"] = "http://ga4gh.org/schemas/tool-registry-schemas" sld["name"] = "file://" + os.path.realpath(args.swagger) document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( cmap(sld)) txt = document_loader.fetch_text( urlparse.urljoin( "file://" + os.getcwd() + "/", args.url)) r = yaml.load(txt) validate_doc(avsc_names, r, document_loader, True) sys.stderr.write("API returned valid response\n") toolreg = Namespace("http://ga4gh.org/schemas/tool-registry-schemas#") td = Namespace( "http://ga4gh.org/schemas/tool-registry-schemas#ToolDescriptor/") if args.print_rdf or args.serve: g = jsonld_context.makerdf(args.url, r, document_loader.ctx) for s, _, o in g.triples((None, td["type"], Literal("CWL"))): for _, _, d in g.triples((s, toolreg["descriptor"], None)): expand_cwl(d, unicode(s), g) if args.print_rdf: print(g.serialize(format="turtle")) if args.serve: t = tempfile.NamedTemporaryFile(suffix=".ttl") g.serialize(t, format="turtle") t.flush() subprocess.check_call( ["./fuseki-server", "--file=" + t.name, "/tools"], cwd=args.fuseki_path)
def add_dep(fn, upstream, set_version, install_to): document_loader, workflowobj, uri = cwltool.load_tool.fetch_document(fn) namespaces = workflowobj.get("$namespaces", cmap({})) document_loader.idx = {} found = [] def _add(wf): found.append(True) hints = wf.setdefault("hints", {}) obj = cmap({"upstream": upstream}) if set_version: obj["version"] = set_version if install_to: obj["installTo"] = install_to if isinstance(hints, list): for h in hints: if expand_ns(namespaces, h["class"]) == CWLDEP_DEPENDENCIES_URL: for u in h["dependencies"]: if u["upstream"] == upstream: u.update(obj) return h["dependencies"].append(cmap(obj)) return hints.append( cmap({ "class": "dep:Dependencies", "dependencies": [obj] })) elif isinstance(hints, dict): for h in hints: if expand_ns(namespaces, h) == CWLDEP_DEPENDENCIES_URL: for u in hints[h]["dependencies"]: if u["upstream"] == upstream: u.update(obj) return hints[h]["dependencies"].append(cmap(obj)) return hints["dep:Dependencies"] = cmap({"dependencies": [obj]}) visit_class(workflowobj, ("Workflow", ), _add) if not found: logging.error("No Workflow found") namespaces["dep"] = CWLDEP_URL workflowobj["$namespaces"] = namespaces del workflowobj["id"] with open("_" + fn + "_", "w") as f: ruamel.yaml.round_trip_dump(workflowobj, f) os.rename("_" + fn + "_", fn)
def _convert_stdstreams_to_files(workflowobj): # type: (Union[Dict[Text, Any], List[Dict[Text, Any]]]) -> None if isinstance(workflowobj, MutableMapping): if workflowobj.get('class') == 'CommandLineTool': with SourceLine(workflowobj, "outputs", ValidationException, _logger.isEnabledFor(logging.DEBUG)): outputs = workflowobj.get('outputs', []) if not isinstance(outputs, CommentedSeq): raise ValidationException('"outputs" section is not ' 'valid.') for out in workflowobj.get('outputs', []): if not isinstance(out, CommentedMap): raise ValidationException( "Output '{}' is not a valid " "OutputParameter.".format(out)) for streamtype in ['stdout', 'stderr']: if out.get('type') == streamtype: if 'outputBinding' in out: raise ValidationException( "Not allowed to specify outputBinding when" " using %s shortcut." % streamtype) if streamtype in workflowobj: filename = workflowobj[streamtype] else: filename = Text( hashlib.sha1( json_dumps(workflowobj, sort_keys=True).encode( 'utf-8')).hexdigest()) workflowobj[streamtype] = filename out['type'] = 'File' out['outputBinding'] = cmap({'glob': filename}) for inp in workflowobj.get('inputs', []): if inp.get('type') == 'stdin': if 'inputBinding' in inp: raise ValidationException( "Not allowed to specify inputBinding when" " using stdin shortcut.") if 'stdin' in workflowobj: raise ValidationException( "Not allowed to specify stdin path when" " using stdin type shortcut.") else: workflowobj['stdin'] = \ "$(inputs.%s.path)" % \ inp['id'].rpartition('#')[2] inp['type'] = 'File' else: for entry in itervalues(workflowobj): _convert_stdstreams_to_files(entry) if isinstance(workflowobj, MutableSequence): for entry in workflowobj: _convert_stdstreams_to_files(entry)
def _convert_stdstreams_to_files(workflowobj): # type: (Union[Dict[Text, Any], List[Dict[Text, Any]]]) -> None if isinstance(workflowobj, MutableMapping): if workflowobj.get('class') == 'CommandLineTool': with SourceLine(workflowobj, "outputs", ValidationException, _logger.isEnabledFor(logging.DEBUG)): outputs = workflowobj.get('outputs', []) if not isinstance(outputs, CommentedSeq): raise ValidationException('"outputs" section is not ' 'valid.') for out in workflowobj.get('outputs', []): if not isinstance(out, CommentedMap): raise ValidationException( "Output '{}' is not a valid " "OutputParameter.".format(out)) for streamtype in ['stdout', 'stderr']: if out.get('type') == streamtype: if 'outputBinding' in out: raise ValidationException( "Not allowed to specify outputBinding when" " using %s shortcut." % streamtype) if streamtype in workflowobj: filename = workflowobj[streamtype] else: filename = Text( hashlib.sha1(json_dumps(workflowobj, sort_keys=True ).encode('utf-8') ).hexdigest()) workflowobj[streamtype] = filename out['type'] = 'File' out['outputBinding'] = cmap({'glob': filename}) for inp in workflowobj.get('inputs', []): if inp.get('type') == 'stdin': if 'inputBinding' in inp: raise ValidationException( "Not allowed to specify inputBinding when" " using stdin shortcut.") if 'stdin' in workflowobj: raise ValidationException( "Not allowed to specify stdin path when" " using stdin type shortcut.") else: workflowobj['stdin'] = \ "$(inputs.%s.path)" % \ inp['id'].rpartition('#')[2] inp['type'] = 'File' else: for entry in itervalues(workflowobj): _convert_stdstreams_to_files(entry) if isinstance(workflowobj, MutableSequence): for entry in workflowobj: _convert_stdstreams_to_files(entry)
def test_subscoped_id(): ldr = schema_salad.ref_resolver.Loader({}) ctx = {"id": "@id", "bar": {"subscope": "bar"}} ldr.add_context(ctx) ra, _ = ldr.resolve_all( cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com" ) assert { "id": "http://example.com/#foo", "bar": {"id": "http://example.com/#foo/bar/baz"}, } == ra
def main(): parser = argparse.ArgumentParser() parser.add_argument("swagger") parser.add_argument("annotations") parser.add_argument("url") parser.add_argument("--print-rdf", action="store_true", default=False) parser.add_argument("--serve", action="store_true", default=False) parser.add_argument("--fuseki-path", type=str, default=".") args = parser.parse_args() warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning) with open(args.annotations) as f2: annotations = yaml.load(f2) with open(args.swagger) as f: sld = swg2salad.swg2salad(yaml.load(f), annotations) sld["$base"] = "http://ga4gh.org/schemas/tool-registry-schemas" sld["name"] = "file://" + os.path.realpath(args.swagger) document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( cmap(sld)) txt = document_loader.fetch_text( urlparse.urljoin("file://" + os.getcwd() + "/", args.url)) r = yaml.load(txt) validate_doc(avsc_names, r, document_loader, True) sys.stderr.write("API returned valid response\n") toolreg = Namespace("http://ga4gh.org/schemas/tool-registry-schemas#") td = Namespace( "http://ga4gh.org/schemas/tool-registry-schemas#ToolDescriptor/") if args.print_rdf or args.serve: g = jsonld_context.makerdf(args.url, r, document_loader.ctx) for s, _, o in g.triples((None, td["type"], Literal("CWL"))): for _, _, d in g.triples((s, toolreg["descriptor"], None)): expand_cwl(d, unicode(s), g) if args.print_rdf: print(g.serialize(format="turtle")) if args.serve: t = tempfile.NamedTemporaryFile(suffix=".ttl") g.serialize(t, format="turtle") t.flush() subprocess.check_call( ["./fuseki-server", "--file=" + t.name, "/tools"], cwd=args.fuseki_path)
def test_blank_node_id() -> None: # Test that blank nodes are passed through and not considered # relative paths. Blank nodes (also called anonymous ids) are # URIs starting with "_:". They are randomly generated # placeholders mainly used internally where an id is needed but # was not given. ldr = Loader({}) ctx = {"id": "@id"} # type: ContextType ldr.add_context(ctx) ra, _ = ldr.resolve_all(cmap({"id": "_:foo"}), "http://example.com") assert {"id": "_:foo"} == ra
def test_run(self, list_images_in_arv): for enable_reuse in (True, False): runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"} tool = cmap({ "inputs": [], "outputs": [], "baseCommand": "ls", "arguments": [{"valueFrom": "$(runtime.outdir)"}] }) make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access): j.run(enable_reuse=enable_reuse) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'runtime_constraints': {}, 'script_parameters': { 'tasks': [{ 'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'}, 'command': ['ls', '$(task.outdir)'] }], }, 'script_version': 'master', 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script': 'crunchrunner', 'runtime_constraints': { 'docker_image': 'arvados/jobs', 'min_cores_per_node': 1, 'min_ram_mb_per_node': 1024, 'min_scratch_mb_per_node': 2048 # tmpdirSize + outdirSize } }), find_or_create=enable_reuse, filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'], ['docker_image_locator', 'in docker', 'arvados/jobs']] )
def test_schemas(self): loader = schema_salad.ref_resolver.Loader({}) ra, _ = loader.resolve_all(cmap({ u"$schemas": [schema_salad.ref_resolver.file_uri(get_data("tests/EDAM.owl"))], u"$namespaces": {u"edam": u"http://edamontology.org/"}, u"edam:has_format": u"edam:format_1915" }), "") self.assertEqual({ u"$schemas": [schema_salad.ref_resolver.file_uri(get_data("tests/EDAM.owl"))], u"$namespaces": {u"edam": u"http://edamontology.org/"}, u'http://edamontology.org/has_format': u'http://edamontology.org/format_1915' }, ra)
def test_typedsl_ref(self): ldr = schema_salad.ref_resolver.Loader({}) ldr.add_context({ "File": "http://example.com/File", "null": "http://example.com/null", "array": "http://example.com/array", "type": { "@type": "@vocab", "typeDSL": True } }) ra, _ = ldr.resolve_all(cmap({"type": "File"}), "") self.assertEqual({'type': 'File'}, ra) ra, _ = ldr.resolve_all(cmap({"type": "File?"}), "") self.assertEqual({'type': ['null', 'File']}, ra) ra, _ = ldr.resolve_all(cmap({"type": "File[]"}), "") self.assertEqual({'type': {'items': 'File', 'type': 'array'}}, ra) ra, _ = ldr.resolve_all(cmap({"type": "File[]?"}), "") self.assertEqual( {'type': ['null', {'items': 'File', 'type': 'array'}]}, ra)
def test_subscoped_id(self): ldr = schema_salad.ref_resolver.Loader({}) ctx = { "id": "@id", "bar": { "subscope": "bar", } } ldr.add_context(ctx) ra, _ = ldr.resolve_all(cmap({ "id": "foo", "bar": { "id": "baz" } }), "http://example.com") self.assertEqual({'id': 'http://example.com/#foo', 'bar': { 'id': 'http://example.com/#foo/bar/baz'}, }, ra)
def _convert_stdstreams_to_files(workflowobj): # type: (Union[Dict[Text, Any], List[Dict[Text, Any]]]) -> None if isinstance(workflowobj, dict): if workflowobj.get('class') == 'CommandLineTool': for out in workflowobj.get('outputs', []): for streamtype in ['stdout', 'stderr']: if out.get('type') == streamtype: if 'outputBinding' in out: raise ValidationException( "Not allowed to specify outputBinding when" " using %s shortcut." % streamtype) if streamtype in workflowobj: filename = workflowobj[streamtype] else: filename = Text(hashlib.sha1(json.dumps(workflowobj, sort_keys=True).encode('utf-8')).hexdigest()) workflowobj[streamtype] = filename out['type'] = 'File' out['outputBinding'] = cmap({'glob': filename}) for inp in workflowobj.get('inputs', []): if inp.get('type') == 'stdin': if 'inputBinding' in inp: raise ValidationException( "Not allowed to specify inputBinding when" " using stdin shortcut.") if 'stdin' in workflowobj: raise ValidationException( "Not allowed to specify stdin path when" " using stdin type shortcut.") else: workflowobj['stdin'] = \ "$(inputs.%s.path)" % \ inp['id'].rpartition('#')[2] inp['type'] = 'File' else: for entry in itervalues(workflowobj): _convert_stdstreams_to_files(entry) if isinstance(workflowobj, list): for entry in workflowobj: _convert_stdstreams_to_files(entry)
def fetch_document(argsworkflow, # type: Union[Text, Dict[Text, Any]] resolver=None, # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text] fetcher_constructor=None # type: FetcherConstructorType ): # type: (...) -> Tuple[Loader, CommentedMap, Text] """Retrieve a CWL document.""" document_loader = default_loader(fetcher_constructor) # type: ignore uri = None # type: Text workflowobj = None # type: CommentedMap if isinstance(argsworkflow, string_types): uri, fileuri = resolve_tool_uri(argsworkflow, resolver=resolver, document_loader=document_loader) workflowobj = document_loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = "#" + Text(id(argsworkflow)) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri
def test_timelimit(self, keepdocker): arv_docker_clear_cache() runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.intermediate_output_ttl = 0 runner.secret_store = cwltool.secrets.SecretStore() keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} tool = cmap({ "inputs": [], "outputs": [], "baseCommand": "ls", "arguments": [{"valueFrom": "$(runtime.outdir)"}], "id": "#", "class": "CommandLineTool", "hints": [ { "class": "http://commonwl.org/cwltool#TimeLimit", "timelimit": 42 } ] }) loadingContext, runtimeContext = self.helper(runner) runtimeContext.name = "test_timelimit" arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), runtimeContext): j.run(runtimeContext) _, kwargs = runner.api.container_requests().create.call_args self.assertEqual(42, kwargs['body']['scheduling_parameters'].get('max_run_time'))
def fetch_document(argsworkflow, # type: Union[Text, dict[Text, Any]] resolver=None, # type: Callable[[Loader, Union[Text, dict[Text, Any]]], Text] fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] ): # type: (...) -> Tuple[Loader, CommentedMap, Text] """Retrieve a CWL document.""" document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"}, fetcher_constructor=fetcher_constructor) uri = None # type: Text workflowobj = None # type: CommentedMap if isinstance(argsworkflow, basestring): split = urlparse.urlsplit(argsworkflow) if split.scheme: uri = argsworkflow elif os.path.exists(os.path.abspath(argsworkflow)): uri = "file://" + os.path.abspath(argsworkflow) elif resolver: uri = resolver(document_loader, argsworkflow) if uri is None: raise ValidationException("Not found: '%s'" % argsworkflow) if argsworkflow != uri: _logger.info("Resolved '%s' to '%s'", argsworkflow, uri) fileuri = urlparse.urldefrag(uri)[0] workflowobj = document_loader.fetch(fileuri) elif isinstance(argsworkflow, dict): uri = "#" + Text(id(argsworkflow)) workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri)) else: raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri
def test_initial_work_dir(self, collection_mock, keepdocker): arv_docker_clear_cache() runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} sourcemock = mock.MagicMock() def get_collection_mock(p): if "/" in p: return (sourcemock, p.split("/", 1)[1]) else: return (sourcemock, "") runner.fs_access.get_collection.side_effect = get_collection_mock vwdmock = mock.MagicMock() collection_mock.return_value = vwdmock vwdmock.portable_data_hash.return_value = "99999999999999999999999999999996+99" tool = cmap({ "inputs": [], "outputs": [], "hints": [{ "class": "InitialWorkDirRequirement", "listing": [{ "class": "File", "basename": "foo", "location": "keep:99999999999999999999999999999995+99/bar" }, { "class": "Directory", "basename": "foo2", "location": "keep:99999999999999999999999999999995+99" }, { "class": "File", "basename": "filename", "location": "keep:99999999999999999999999999999995+99/baz/filename" }, { "class": "Directory", "basename": "subdir", "location": "keep:99999999999999999999999999999995+99/subdir" } ] }], "baseCommand": "ls" }) make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names, make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_initial_work_dir", make_fs_access=make_fs_access, tmpdir="/tmp"): j.run() call_args, call_kwargs = runner.api.container_requests().create.call_args vwdmock.copy.assert_has_calls([mock.call('bar', 'foo', source_collection=sourcemock)]) vwdmock.copy.assert_has_calls([mock.call('', 'foo2', source_collection=sourcemock)]) vwdmock.copy.assert_has_calls([mock.call('baz/filename', 'filename', source_collection=sourcemock)]) vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)]) call_body_expected = { 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_initial_work_dir', 'runtime_constraints': { 'vcpus': 1, 'ram': 1073741824 }, 'use_existing': True, 'priority': 1, 'mounts': { '/tmp': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl/foo': { 'kind': 'collection', 'path': 'foo', 'portable_data_hash': '99999999999999999999999999999996+99' }, '/var/spool/cwl/foo2': { 'kind': 'collection', 'path': 'foo2', 'portable_data_hash': '99999999999999999999999999999996+99' }, '/var/spool/cwl/filename': { 'kind': 'collection', 'path': 'filename', 'portable_data_hash': '99999999999999999999999999999996+99' }, '/var/spool/cwl/subdir': { 'kind': 'collection', 'path': 'subdir', 'portable_data_hash': '99999999999999999999999999999996+99' } }, 'state': 'Committed', 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'container_image': 'arvados/jobs', 'command': ['ls'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': { }, 'properties': {} } call_body = call_kwargs.get('body', None) self.assertNotEqual(None, call_body) for key in call_body: self.assertEqual(call_body_expected.get(key), call_body.get(key))
def validate_document(document_loader, # type: Loader workflowobj, # type: CommentedMap uri, # type: Text enable_dev=False, # type: bool strict=True, # type: bool preprocess_only=False, # type: bool fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] ): # type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text] """Validate a CWL document.""" if isinstance(workflowobj, list): workflowobj = { "$graph": workflowobj } if not isinstance(workflowobj, dict): raise ValueError("workflowjobj must be a dict") jobobj = None if "cwl:tool" in workflowobj: jobobj, _ = document_loader.resolve_all(workflowobj, uri) uri = urlparse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"]) del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"] workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1] fileuri = urlparse.urldefrag(uri)[0] if "cwlVersion" in workflowobj: if not isinstance(workflowobj["cwlVersion"], (str, Text)): raise Exception("'cwlVersion' must be a string, got %s" % type(workflowobj["cwlVersion"])) workflowobj["cwlVersion"] = re.sub( r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", workflowobj["cwlVersion"]) else: _logger.warn("No cwlVersion found, treating this file as draft-2.") workflowobj["cwlVersion"] = "draft-2" if workflowobj["cwlVersion"] == "draft-2": workflowobj = cast(CommentedMap, cmap(update._draft2toDraft3dev1( workflowobj, document_loader, uri, update_steps=False))) if "@graph" in workflowobj: workflowobj["$graph"] = workflowobj["@graph"] del workflowobj["@graph"] (sch_document_loader, avsc_names) = \ process.get_schema(workflowobj["cwlVersion"])[:2] if isinstance(avsc_names, Exception): raise avsc_names processobj = None # type: Union[CommentedMap, CommentedSeq, unicode] document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph, idx=document_loader.idx, cache=sch_document_loader.cache, fetcher_constructor=fetcher_constructor) workflowobj["id"] = fileuri processobj, metadata = document_loader.resolve_all(workflowobj, fileuri) if not isinstance(processobj, (CommentedMap, CommentedSeq)): raise ValidationException("Workflow must be a dict or list.") if not metadata: if not isinstance(processobj, dict): raise ValidationException("Draft-2 workflows must be a dict.") metadata = cast(CommentedMap, cmap({"$namespaces": processobj.get("$namespaces", {}), "$schemas": processobj.get("$schemas", []), "cwlVersion": processobj["cwlVersion"]}, fn=fileuri)) _convert_stdstreams_to_files(workflowobj) if preprocess_only: return document_loader, avsc_names, processobj, metadata, uri schema.validate_doc(avsc_names, processobj, document_loader, strict) if metadata.get("cwlVersion") != update.LATEST: processobj = cast(CommentedMap, cmap(update.update( processobj, document_loader, fileuri, enable_dev, metadata))) if jobobj: metadata[u"cwl:defaults"] = jobobj return document_loader, avsc_names, processobj, metadata, uri
def test_run(self, keepdocker): for enable_reuse in (True, False): arv_docker_clear_cache() runner = mock.MagicMock() runner.ignore_docker_for_reuse = False runner.intermediate_output_ttl = 0 runner.secret_store = cwltool.secrets.SecretStore() keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} tool = cmap({ "inputs": [], "outputs": [], "baseCommand": "ls", "arguments": [{"valueFrom": "$(runtime.outdir)"}], "id": "#", "class": "CommandLineTool" }) loadingContext, runtimeContext = self.helper(runner, enable_reuse) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), runtimeContext): j.run(runtimeContext) runner.api.container_requests().create.assert_called_with( body=JsonDiffMatcher({ 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_run_'+str(enable_reuse), 'runtime_constraints': { 'vcpus': 1, 'ram': 1073741824 }, 'use_existing': enable_reuse, 'priority': 500, 'mounts': { '/tmp': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 1073741824 } }, 'state': 'Committed', 'output_name': 'Output for step test_run_'+str(enable_reuse), 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'output_ttl': 0, 'container_image': '99999999999999999999999999999993+99', 'command': ['ls', '/var/spool/cwl'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': {}, 'properties': {}, 'secret_mounts': {} }))
def test_secrets(self, keepdocker): arv_docker_clear_cache() runner = mock.MagicMock() runner.ignore_docker_for_reuse = False runner.intermediate_output_ttl = 0 runner.secret_store = cwltool.secrets.SecretStore() keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") tool = cmap({"arguments": ["md5sum", "example.conf"], "class": "CommandLineTool", "hints": [ { "class": "http://commonwl.org/cwltool#Secrets", "secrets": [ "#secret_job.cwl/pw" ] } ], "id": "#secret_job.cwl", "inputs": [ { "id": "#secret_job.cwl/pw", "type": "string" } ], "outputs": [ ], "requirements": [ { "class": "InitialWorkDirRequirement", "listing": [ { "entry": "username: user\npassword: $(inputs.pw)\n", "entryname": "example.conf" } ] } ]}) loadingContext, runtimeContext = self.helper(runner) runtimeContext.name = "test_secrets" arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext) arvtool.formatgraph = None job_order = {"pw": "blorp"} runner.secret_store.store(["pw"], job_order) for j in arvtool.job(job_order, mock.MagicMock(), runtimeContext): j.run(runtimeContext) runner.api.container_requests().create.assert_called_with( body=JsonDiffMatcher({ 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_secrets', 'runtime_constraints': { 'vcpus': 1, 'ram': 1073741824 }, 'use_existing': True, 'priority': 500, 'mounts': { '/tmp': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 1073741824 } }, 'state': 'Committed', 'output_name': 'Output for step test_secrets', 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'output_ttl': 0, 'container_image': '99999999999999999999999999999993+99', 'command': ['md5sum', 'example.conf'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': {}, 'properties': {}, "secret_mounts": { "/var/spool/cwl/example.conf": { "content": "username: user\npassword: blorp\n", "kind": "text" } } }))
def validate_document(document_loader, # type: Loader workflowobj, # type: CommentedMap uri, # type: Text overrides, # type: List[Dict] metadata, # type: Dict[Text, Any] enable_dev=False, # type: bool strict=True, # type: bool preprocess_only=False, # type: bool fetcher_constructor=None, # type: FetcherConstructorType skip_schemas=None, # type: bool do_validate=True # type: bool ): # type: (...) -> Tuple[Loader, schema.Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text] """Validate a CWL document.""" if isinstance(workflowobj, MutableSequence): workflowobj = cmap({ "$graph": workflowobj }, fn=uri) if not isinstance(workflowobj, MutableMapping): raise ValueError("workflowjobj must be a dict, got '{}': {}".format( type(workflowobj), workflowobj)) jobobj = None if "cwl:tool" in workflowobj: job_loader = default_loader(fetcher_constructor) # type: ignore jobobj, _ = job_loader.resolve_all(workflowobj, uri, checklinks=do_validate) uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"]) del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"] if isinstance(jobobj, CommentedMap) and "http://commonwl.org/cwltool#overrides" in jobobj: overrides.extend(resolve_overrides(jobobj, uri, uri)) del jobobj["http://commonwl.org/cwltool#overrides"] workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1] fileuri = urllib.parse.urldefrag(uri)[0] if "cwlVersion" not in workflowobj: if 'cwlVersion' in metadata: workflowobj['cwlVersion'] = metadata['cwlVersion'] else: raise ValidationException( "No cwlVersion found. " "Use the following syntax in your CWL document to declare " "the version: cwlVersion: <version>.\n" "Note: if this is a CWL draft-2 (pre v1.0) document then it " "will need to be upgraded first.") if not isinstance(workflowobj["cwlVersion"], string_types): with SourceLine(workflowobj, "cwlVersion", ValidationException): raise ValidationException("'cwlVersion' must be a string, " "got {}".format( type(workflowobj["cwlVersion"]))) # strip out version workflowobj["cwlVersion"] = re.sub( r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", workflowobj["cwlVersion"]) if workflowobj["cwlVersion"] not in list(ALLUPDATES): # print out all the Supported Versions of cwlVersion versions = [] for version in list(ALLUPDATES): if "dev" in version: version += " (with --enable-dev flag only)" versions.append(version) versions.sort() raise ValidationException( "The CWL reference runner no longer supports pre CWL v1.0 " "documents. Supported versions are: " "\n{}".format("\n".join(versions))) (sch_document_loader, avsc_names) = \ process.get_schema(workflowobj["cwlVersion"])[:2] if isinstance(avsc_names, Exception): raise avsc_names processobj = None # type: Union[CommentedMap, CommentedSeq, Text, None] document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph, idx=document_loader.idx, cache=sch_document_loader.cache, fetcher_constructor=fetcher_constructor, skip_schemas=skip_schemas) _add_blank_ids(workflowobj) workflowobj["id"] = fileuri processobj, new_metadata = document_loader.resolve_all( workflowobj, fileuri, checklinks=do_validate) if not isinstance(processobj, (CommentedMap, CommentedSeq)): raise ValidationException("Workflow must be a dict or list.") if not new_metadata and isinstance(processobj, CommentedMap): new_metadata = cast(CommentedMap, cmap( {"$namespaces": processobj.get("$namespaces", {}), "$schemas": processobj.get("$schemas", []), "cwlVersion": processobj["cwlVersion"]}, fn=fileuri)) _convert_stdstreams_to_files(workflowobj) if preprocess_only: return document_loader, avsc_names, processobj, new_metadata, uri if do_validate: schema.validate_doc(avsc_names, processobj, document_loader, strict) if new_metadata.get("cwlVersion") != update.LATEST: processobj = cast(CommentedMap, cmap(update.update( processobj, document_loader, fileuri, enable_dev, new_metadata))) if jobobj is not None: new_metadata[u"cwl:defaults"] = jobobj if overrides: new_metadata[u"cwltool:overrides"] = overrides return document_loader, avsc_names, processobj, new_metadata, uri
def test_scoped_id(self): ldr = schema_salad.ref_resolver.Loader({}) ctx = { "id": "@id", "location": { "@id": "@id", "@type": "@id" }, "bar": "http://example.com/bar", "ex": "http://example.com/" } ldr.add_context(ctx) ra, _ = ldr.resolve_all(cmap({ "id": "foo", "bar": { "id": "baz" } }), "http://example.com") self.assertEqual({'id': 'http://example.com/#foo', 'bar': { 'id': 'http://example.com/#foo/baz'}, }, ra) g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all(cmap({ "location": "foo", "bar": { "location": "baz" } }), "http://example.com", checklinks=False) self.assertEqual({'location': 'http://example.com/foo', 'bar': { 'location': 'http://example.com/baz'}, }, ra) g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all(cmap({ "id": "foo", "bar": { "location": "baz" } }), "http://example.com", checklinks=False) self.assertEqual({'id': 'http://example.com/#foo', 'bar': { 'location': 'http://example.com/baz'}, }, ra) g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) ra, _ = ldr.resolve_all(cmap({ "location": "foo", "bar": { "id": "baz" } }), "http://example.com", checklinks=False) self.assertEqual({'location': 'http://example.com/foo', 'bar': { 'id': 'http://example.com/#baz'}, }, ra) g = makerdf(None, ra, ctx) print(g.serialize(format="n3"))
def resolve_and_validate_document(loadingContext, workflowobj, uri, preprocess_only=False, # type: bool skip_schemas=None, # type: bool ): # type: (...) -> Tuple[LoadingContext, Text] """Validate a CWL document.""" loadingContext = loadingContext.copy() if not isinstance(workflowobj, MutableMapping): raise ValueError("workflowjobj must be a dict, got '{}': {}".format( type(workflowobj), workflowobj)) jobobj = None if "cwl:tool" in workflowobj: jobobj, _ = loadingContext.loader.resolve_all(workflowobj, uri, checklinks=loadingContext.do_validate) uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"]) del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"] workflowobj = fetch_document(uri, loadingContext)[1] fileuri = urllib.parse.urldefrag(uri)[0] cwlVersion = workflowobj.get("cwlVersion") if not cwlVersion: fileobj = fetch_document(fileuri, loadingContext)[1] cwlVersion = fileobj.get("cwlVersion") if not cwlVersion: raise ValidationException( "No cwlVersion found. " "Use the following syntax in your CWL document to declare " "the version: cwlVersion: <version>.\n" "Note: if this is a CWL draft-2 (pre v1.0) document then it " "will need to be upgraded first.") if not isinstance(cwlVersion, string_types): with SourceLine(workflowobj, "cwlVersion", ValidationException): raise ValidationException("'cwlVersion' must be a string, " "got {}".format( type(cwlVersion))) # strip out version cwlVersion = re.sub( r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", cwlVersion) if cwlVersion not in list(ALLUPDATES): # print out all the Supported Versions of cwlVersion versions = [] for version in list(ALLUPDATES): if "dev" in version: version += " (with --enable-dev flag only)" versions.append(version) versions.sort() raise ValidationException( "The CWL reference runner no longer supports pre CWL v1.0 " "documents. Supported versions are: " "\n{}".format("\n".join(versions))) if isinstance(jobobj, CommentedMap) and "http://commonwl.org/cwltool#overrides" in jobobj: loadingContext.overrides_list.extend(resolve_overrides(jobobj, uri, uri)) del jobobj["http://commonwl.org/cwltool#overrides"] if isinstance(jobobj, CommentedMap) and "https://w3id.org/cwl/cwl#requirements" in jobobj: if cwlVersion not in ("v1.1.0-dev1",): raise ValidationException( "`cwl:requirements` in the input object is not part of CWL " "v1.0. You can adjust to use `cwltool:overrides` instead; or you " "can set the cwlVersion to v1.1.0-dev1 or greater and re-run with " "--enable-dev.") loadingContext.overrides_list.append({"overrideTarget": uri, "requirements": jobobj["https://w3id.org/cwl/cwl#requirements"]}) del jobobj["https://w3id.org/cwl/cwl#requirements"] (sch_document_loader, avsc_names) = \ process.get_schema(cwlVersion)[:2] if isinstance(avsc_names, Exception): raise avsc_names processobj = None # type: Union[CommentedMap, CommentedSeq, Text, None] document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph, idx=loadingContext.loader.idx, cache=sch_document_loader.cache, fetcher_constructor=loadingContext.fetcher_constructor, skip_schemas=skip_schemas) if cwlVersion == "v1.0": _add_blank_ids(workflowobj) workflowobj["id"] = fileuri processobj, metadata = document_loader.resolve_all( workflowobj, fileuri, checklinks=loadingContext.do_validate) if not isinstance(processobj, (CommentedMap, CommentedSeq)): raise ValidationException("Workflow must be a CommentedMap or CommentedSeq.") if not isinstance(metadata, CommentedMap): raise ValidationException("metadata must be a CommentedMap, was %s" % type(metadata)) _convert_stdstreams_to_files(workflowobj) if preprocess_only: return loadingContext, uri if loadingContext.do_validate: schema.validate_doc(avsc_names, processobj, document_loader, loadingContext.strict) if loadingContext.do_update: processobj = cast(CommentedMap, cmap(update.update( processobj, document_loader, fileuri, loadingContext.enable_dev, metadata))) if isinstance(processobj, MutableMapping): document_loader.idx[processobj["id"]] = processobj elif isinstance(processobj, MutableSequence): document_loader.idx[metadata["id"]] = metadata for po in processobj: document_loader.idx[po["id"]] = po if jobobj is not None: loadingContext.jobdefaults = jobobj loadingContext.loader = document_loader loadingContext.avsc_names = avsc_names loadingContext.metadata = metadata return loadingContext, uri
def test_resource_requirements(self, keepdocker): arv_docker_clear_cache() runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} tool = cmap({ "inputs": [], "outputs": [], "hints": [{ "class": "ResourceRequirement", "coresMin": 3, "ramMin": 3000, "tmpdirMin": 4000, "outdirMin": 5000 }, { "class": "http://arvados.org/cwl#RuntimeConstraints", "keep_cache": 512 }, { "class": "http://arvados.org/cwl#APIRequirement", }, { "class": "http://arvados.org/cwl#PartitionRequirement", "partition": "blurb" }], "baseCommand": "ls" }) make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names, make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements", make_fs_access=make_fs_access, tmpdir="/tmp"): j.run() call_args, call_kwargs = runner.api.container_requests().create.call_args call_body_expected = { 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_resource_requirements', 'runtime_constraints': { 'vcpus': 3, 'ram': 3145728000, 'keep_cache_ram': 536870912, 'API': True }, 'use_existing': True, 'priority': 1, 'mounts': { '/tmp': {'kind': 'tmp', "capacity": 4194304000 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 5242880000 } }, 'state': 'Committed', 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'container_image': 'arvados/jobs', 'command': ['ls'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': { 'partitions': ['blurb'] }, 'properties': {} } call_body = call_kwargs.get('body', None) self.assertNotEqual(None, call_body) for key in call_body: self.assertEqual(call_body_expected.get(key), call_body.get(key))
def test_resource_requirements(self, keepdocker): arv_docker_clear_cache() runner = mock.MagicMock() runner.ignore_docker_for_reuse = False runner.intermediate_output_ttl = 3600 runner.secret_store = cwltool.secrets.SecretStore() keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} tool = cmap({ "inputs": [], "outputs": [], "hints": [{ "class": "ResourceRequirement", "coresMin": 3, "ramMin": 3000, "tmpdirMin": 4000, "outdirMin": 5000 }, { "class": "http://arvados.org/cwl#RuntimeConstraints", "keep_cache": 512 }, { "class": "http://arvados.org/cwl#APIRequirement", }, { "class": "http://arvados.org/cwl#PartitionRequirement", "partition": "blurb" }, { "class": "http://arvados.org/cwl#IntermediateOutput", "outputTTL": 7200 }, { "class": "http://arvados.org/cwl#ReuseRequirement", "enableReuse": False }], "baseCommand": "ls", "id": "#", "class": "CommandLineTool" }) loadingContext, runtimeContext = self.helper(runner) runtimeContext.name = "test_resource_requirements" arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), runtimeContext): j.run(runtimeContext) call_args, call_kwargs = runner.api.container_requests().create.call_args call_body_expected = { 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_resource_requirements', 'runtime_constraints': { 'vcpus': 3, 'ram': 3145728000, 'keep_cache_ram': 536870912, 'API': True }, 'use_existing': False, 'priority': 500, 'mounts': { '/tmp': {'kind': 'tmp', "capacity": 4194304000 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 5242880000 } }, 'state': 'Committed', 'output_name': 'Output for step test_resource_requirements', 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'output_ttl': 7200, 'container_image': '99999999999999999999999999999993+99', 'command': ['ls'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': { 'partitions': ['blurb'] }, 'properties': {}, 'secret_mounts': {} } call_body = call_kwargs.get('body', None) self.assertNotEqual(None, call_body) for key in call_body: self.assertEqual(call_body_expected.get(key), call_body.get(key))
def test_initial_work_dir(self, collection_mock, keepdocker): arv_docker_clear_cache() runner = mock.MagicMock() runner.ignore_docker_for_reuse = False runner.intermediate_output_ttl = 0 runner.secret_store = cwltool.secrets.SecretStore() keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99"} sourcemock = mock.MagicMock() def get_collection_mock(p): if "/" in p: return (sourcemock, p.split("/", 1)[1]) else: return (sourcemock, "") runner.fs_access.get_collection.side_effect = get_collection_mock vwdmock = mock.MagicMock() collection_mock.side_effect = lambda *args, **kwargs: CollectionMock(vwdmock, *args, **kwargs) tool = cmap({ "inputs": [], "outputs": [], "hints": [{ "class": "InitialWorkDirRequirement", "listing": [{ "class": "File", "basename": "foo", "location": "keep:99999999999999999999999999999995+99/bar" }, { "class": "Directory", "basename": "foo2", "location": "keep:99999999999999999999999999999995+99" }, { "class": "File", "basename": "filename", "location": "keep:99999999999999999999999999999995+99/baz/filename" }, { "class": "Directory", "basename": "subdir", "location": "keep:99999999999999999999999999999995+99/subdir" } ] }], "baseCommand": "ls", "id": "#", "class": "CommandLineTool" }) loadingContext, runtimeContext = self.helper(runner) runtimeContext.name = "test_initial_work_dir" arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), runtimeContext): j.run(runtimeContext) call_args, call_kwargs = runner.api.container_requests().create.call_args vwdmock.copy.assert_has_calls([mock.call('bar', 'foo', source_collection=sourcemock)]) vwdmock.copy.assert_has_calls([mock.call('', 'foo2', source_collection=sourcemock)]) vwdmock.copy.assert_has_calls([mock.call('baz/filename', 'filename', source_collection=sourcemock)]) vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)]) call_body_expected = { 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_initial_work_dir', 'runtime_constraints': { 'vcpus': 1, 'ram': 1073741824 }, 'use_existing': True, 'priority': 500, 'mounts': { '/tmp': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl/foo': { 'kind': 'collection', 'path': 'foo', 'portable_data_hash': '99999999999999999999999999999996+99' }, '/var/spool/cwl/foo2': { 'kind': 'collection', 'path': 'foo2', 'portable_data_hash': '99999999999999999999999999999996+99' }, '/var/spool/cwl/filename': { 'kind': 'collection', 'path': 'filename', 'portable_data_hash': '99999999999999999999999999999996+99' }, '/var/spool/cwl/subdir': { 'kind': 'collection', 'path': 'subdir', 'portable_data_hash': '99999999999999999999999999999996+99' } }, 'state': 'Committed', 'output_name': 'Output for step test_initial_work_dir', 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'output_ttl': 0, 'container_image': '99999999999999999999999999999993+99', 'command': ['ls'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': { }, 'properties': {}, 'secret_mounts': {} } call_body = call_kwargs.get('body', None) self.assertNotEqual(None, call_body) for key in call_body: self.assertEqual(call_body_expected.get(key), call_body.get(key))
def test_mounts(self, keepdocker): arv_docker_clear_cache() runner = mock.MagicMock() runner.ignore_docker_for_reuse = False runner.intermediate_output_ttl = 0 runner.secret_store = cwltool.secrets.SecretStore() keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999994+99", "manifest_text": ". 99999999999999999999999999999994+99 0:0:file1 0:0:file2"} document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") tool = cmap({ "inputs": [ {"id": "p1", "type": "Directory"} ], "outputs": [], "baseCommand": "ls", "arguments": [{"valueFrom": "$(runtime.outdir)"}], "id": "#", "class": "CommandLineTool" }) loadingContext, runtimeContext = self.helper(runner) runtimeContext.name = "test_run_mounts" arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext) arvtool.formatgraph = None job_order = { "p1": { "class": "Directory", "location": "keep:99999999999999999999999999999994+44", "listing": [ { "class": "File", "location": "keep:99999999999999999999999999999994+44/file1", }, { "class": "File", "location": "keep:99999999999999999999999999999994+44/file2", } ] } } for j in arvtool.job(job_order, mock.MagicMock(), runtimeContext): j.run(runtimeContext) runner.api.container_requests().create.assert_called_with( body=JsonDiffMatcher({ 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'name': 'test_run_mounts', 'runtime_constraints': { 'vcpus': 1, 'ram': 1073741824 }, 'use_existing': True, 'priority': 500, 'mounts': { "/keep/99999999999999999999999999999994+44": { "kind": "collection", "portable_data_hash": "99999999999999999999999999999994+44" }, '/tmp': {'kind': 'tmp', "capacity": 1073741824 }, '/var/spool/cwl': {'kind': 'tmp', "capacity": 1073741824 } }, 'state': 'Committed', 'output_name': 'Output for step test_run_mounts', 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'output_path': '/var/spool/cwl', 'output_ttl': 0, 'container_image': '99999999999999999999999999999994+99', 'command': ['ls', '/var/spool/cwl'], 'cwd': '/var/spool/cwl', 'scheduling_parameters': {}, 'properties': {}, 'secret_mounts': {} }))
def validate_document(document_loader, # type: Loader workflowobj, # type: CommentedMap uri, # type: Text enable_dev=False, # type: bool strict=True, # type: bool preprocess_only=False, # type: bool fetcher_constructor=None, # type: FetcherConstructorType skip_schemas=None, # type: bool overrides=None, # type: List[Dict] metadata=None, # type: Optional[Dict] ): # type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text] """Validate a CWL document.""" if isinstance(workflowobj, list): workflowobj = cmap({ "$graph": workflowobj }, fn=uri) if not isinstance(workflowobj, dict): raise ValueError("workflowjobj must be a dict, got '%s': %s" % (type(workflowobj), workflowobj)) jobobj = None if "cwl:tool" in workflowobj: job_loader = default_loader(fetcher_constructor) # type: ignore jobobj, _ = job_loader.resolve_all(workflowobj, uri) uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"]) del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"] if "http://commonwl.org/cwltool#overrides" in jobobj: overrides.extend(resolve_overrides(jobobj, uri, uri)) del jobobj["http://commonwl.org/cwltool#overrides"] workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1] fileuri = urllib.parse.urldefrag(uri)[0] if "cwlVersion" not in workflowobj: if metadata and 'cwlVersion' in metadata: workflowobj['cwlVersion'] = metadata['cwlVersion'] else: raise ValidationException("No cwlVersion found." "Use the following syntax in your CWL document to declare " "the version: cwlVersion: <version>") if not isinstance(workflowobj["cwlVersion"], (str, Text)): raise Exception("'cwlVersion' must be a string, got %s" % type(workflowobj["cwlVersion"])) # strip out version workflowobj["cwlVersion"] = re.sub( r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", workflowobj["cwlVersion"]) if workflowobj["cwlVersion"] not in list(ALLUPDATES): # print out all the Supported Versions of cwlVersion versions = list(ALLUPDATES) # ALLUPDATES is a dict versions.sort() raise ValidationException("'cwlVersion' not valid. Supported CWL versions are: \n{}".format("\n".join(versions))) if workflowobj["cwlVersion"] == "draft-2": workflowobj = cast(CommentedMap, cmap(update._draft2toDraft3dev1( workflowobj, document_loader, uri, update_steps=False))) if "@graph" in workflowobj: workflowobj["$graph"] = workflowobj["@graph"] del workflowobj["@graph"] (sch_document_loader, avsc_names) = \ process.get_schema(workflowobj["cwlVersion"])[:2] if isinstance(avsc_names, Exception): raise avsc_names processobj = None # type: Union[CommentedMap, CommentedSeq, Text] document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph, idx=document_loader.idx, cache=sch_document_loader.cache, fetcher_constructor=fetcher_constructor, skip_schemas=skip_schemas) _add_blank_ids(workflowobj) workflowobj["id"] = fileuri processobj, new_metadata = document_loader.resolve_all(workflowobj, fileuri) if not isinstance(processobj, (CommentedMap, CommentedSeq)): raise ValidationException("Workflow must be a dict or list.") if not new_metadata: if not isinstance(processobj, dict): raise ValidationException("Draft-2 workflows must be a dict.") new_metadata = cast(CommentedMap, cmap( {"$namespaces": processobj.get("$namespaces", {}), "$schemas": processobj.get("$schemas", []), "cwlVersion": processobj["cwlVersion"]}, fn=fileuri)) _convert_stdstreams_to_files(workflowobj) if preprocess_only: return document_loader, avsc_names, processobj, new_metadata, uri schema.validate_doc(avsc_names, processobj, document_loader, strict) if new_metadata.get("cwlVersion") != update.LATEST: processobj = cast(CommentedMap, cmap(update.update( processobj, document_loader, fileuri, enable_dev, new_metadata))) if jobobj: new_metadata[u"cwl:defaults"] = jobobj if overrides: new_metadata[u"cwltool:overrides"] = overrides return document_loader, avsc_names, processobj, new_metadata, uri
def job(self, joborder, output_callback, **kwargs): kwargs["work_api"] = self.work_api req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer") if req: with SourceLine(self.tool, None, WorkflowException): if "id" not in self.tool: raise WorkflowException("%s object must have 'id'" % (self.tool["class"])) document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"]) with Perf(metrics, "subworkflow upload_deps"): upload_dependencies(self.arvrunner, os.path.basename(joborder.get("id", "#")), document_loader, joborder, joborder.get("id", "#"), False) if self.wf_pdh is None: workflowobj["requirements"] = dedup_reqs(self.requirements) workflowobj["hints"] = dedup_reqs(self.hints) packed = pack(document_loader, workflowobj, uri, self.metadata) upload_dependencies(self.arvrunner, kwargs.get("name", ""), document_loader, packed, uri, False) with Perf(metrics, "subworkflow adjust"): joborder_keepmount = copy.deepcopy(joborder) def keepmount(obj): with SourceLine(obj, None, WorkflowException): if "location" not in obj: raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj)) with SourceLine(obj, "location", WorkflowException): if obj["location"].startswith("keep:"): obj["location"] = "/keep/" + obj["location"][5:] if "listing" in obj: del obj["listing"] elif obj["location"].startswith("_:"): del obj["location"] else: raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"]) adjustFileObjs(joborder_keepmount, keepmount) adjustDirObjs(joborder_keepmount, keepmount) if self.wf_pdh is None: adjustFileObjs(packed, keepmount) adjustDirObjs(packed, keepmount) self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed) wf_runner = cmap({ "class": "CommandLineTool", "baseCommand": "cwltool", "inputs": self.tool["inputs"], "outputs": self.tool["outputs"], "stdout": "cwl.output.json", "requirements": workflowobj["requirements"]+[ { "class": "InitialWorkDirRequirement", "listing": [{ "entryname": "workflow.cwl", "entry": { "class": "File", "location": "keep:%s/workflow.cwl" % self.wf_pdh } }, { "entryname": "cwl.input.yml", "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${') }] }], "hints": workflowobj["hints"], "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"] }) kwargs["loader"] = self.doc_loader kwargs["avsc_names"] = self.doc_schema return ArvadosCommandTool(self.arvrunner, wf_runner, **kwargs).job(joborder, output_callback, **kwargs) else: return super(ArvadosWorkflow, self).job(joborder, output_callback, **kwargs)