def test_get_subgraph(): loadingContext = LoadingContext({"construct_tool_object": default_make_tool}) wf = norm(Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri()) loadingContext.do_update = False tool = load_tool(wf, loadingContext) sg = norm(Path(get_data("tests/subgraph")).as_uri()) def clean(val): if isinstance(val, str): if val.startswith(sg): return val[len(sg) + 1 :] if isinstance(val, dict): return {k: clean(v) for k, v in val.items()} if isinstance(val, list): return [clean(v) for v in val] return val for a in ( "file1", "file2", "file3", "count_output", "output3", "output4", "output5", "step1", "step2", "step3", "step4", "step5", ): extracted = get_subgraph([wf + "#" + a], tool) with open(get_data("tests/subgraph/extract_" + a + ".json")) as f: assert json.load(f) == clean(convert_to_dict(extracted))
def test_input_deps_cmdline_opts_relative_deps_cwd(): if sys.version_info[0] < 3: stream = BytesIO() else: stream = StringIO() data_path = get_data("tests/wf/whale.txt") main([ "--print-input-deps", "--relative-deps", "cwd", get_data("tests/wf/count-lines1-wf.cwl"), "--file1", data_path ], stdout=stream) goal = { "class": "File", "location": "", "format": CWL_IANA, "secondaryFiles": [{ "class": "File", "location": str(Path(os.path.relpath(data_path, os.path.curdir))), "basename": "whale.txt", "nameroot": "whale", "nameext": ".txt" }] } assert json.loads(stream.getvalue()) == goal
def test_resolve_local(self): origpath = os.getcwd() os.chdir(os.path.join(get_data(""))) def norm(uri): if onWindows(): return uri.lower() else: return uri try: root = Path.cwd() rooturi = root.as_uri() self.assertEqual(norm(rooturi+"/tests/echo.cwl"), norm(resolve_local(None, os.path.join("tests", "echo.cwl")))) self.assertEqual(norm(rooturi+"/tests/echo.cwl#main"), norm(resolve_local(None, os.path.join("tests", "echo.cwl")+"#main"))) self.assertEqual(norm(rooturi+"/tests/echo.cwl"), norm(resolve_local(None, str(root / "tests" / "echo.cwl")))) self.assertEqual(norm(rooturi+"/tests/echo.cwl#main"), norm(resolve_local(None, str(root / "tests" / "echo.cwl")+"#main"))) finally: os.chdir(origpath)
def test_resolve_local(self): origpath = os.getcwd() os.chdir(os.path.join(get_data(""))) def norm(uri): if onWindows(): return uri.lower() else: return uri try: root = Path.cwd() rooturi = root.as_uri() self.assertEqual( norm(rooturi + "/tests/echo.cwl"), norm(resolve_local(None, os.path.join("tests", "echo.cwl")))) self.assertEqual( norm(rooturi + "/tests/echo.cwl#main"), norm( resolve_local(None, os.path.join("tests", "echo.cwl") + "#main"))) self.assertEqual( norm(rooturi + "/tests/echo.cwl"), norm(resolve_local(None, str(root / "tests" / "echo.cwl")))) self.assertEqual( norm(rooturi + "/tests/echo.cwl#main"), norm( resolve_local(None, str(root / "tests" / "echo.cwl") + "#main"))) finally: os.chdir(origpath)
def working_directory(path): """Changes working directory and returns to previous on exit.""" prev_cwd = Path.cwd() # before python 3.6 chdir doesn't support paths from pathlib os.chdir(str(path)) try: yield finally: os.chdir(str(prev_cwd))
def _arcp2file(base_path, uri): parsed = arcp.parse_arcp(uri) # arcp URIs, ensure they are local to our RO assert parsed.uuid == arcp.parse_arcp(find_arcp(base_path)).uuid,\ 'arcp URI must be local to the research object' path = parsed.path[1:] # Strip first / # Convert to local path, in case it uses \ on Windows lpath = str(Path(path)) return os.path.join(base_path, lpath)
def get_data(filename): # normalizing path depending on OS or else it will cause problem when joining path filename = os.path.normpath(filename) filepath = None try: filepath = resource_filename(Requirement.parse("cwltool"), filename) except ResolutionError: pass if not filepath or not os.path.isfile(filepath): filepath = os.path.join(os.path.dirname(__file__), os.pardir, filename) return str(Path(filepath).resolve())
def test_checklink_outputSource(): """Is outputSource resolved correctly independent of value of do_validate.""" outsrc = norm(Path(get_data( "tests/wf/1st-workflow.cwl")).as_uri()) + "#argument/classfile" loadingContext = LoadingContext({"do_validate": True}) tool = load_tool(get_data("tests/wf/1st-workflow.cwl"), loadingContext) assert norm(tool.tool["outputs"][0]["outputSource"]) == outsrc loadingContext = LoadingContext({"do_validate": False}) tool = load_tool(get_data("tests/wf/1st-workflow.cwl"), loadingContext) assert norm(tool.tool["outputs"][0]["outputSource"]) == outsrc
def test_load_graph_fragment(): """Reloading from a dictionary without a cwlVersion.""" loadingContext = LoadingContext() uri = Path(get_data("tests/wf/scatter-wf4.cwl")).as_uri() + "#main" tool = load_tool(uri, loadingContext) rs, metadata = tool.doc_loader.resolve_ref(uri) # Reload from a dict (in 'rs'), not a URI. The dict is a fragment # of original document and doesn't have cwlVersion set, so test # that it correctly looks up the root document to get the # cwlVersion. tool = load_tool(tool.tool, loadingContext) assert tool.metadata["cwlVersion"] == INTERNAL_VERSION
def test_load_graph_fragment_from_packed(): """Loading a fragment from packed with update.""" loadingContext = LoadingContext() uri = Path(get_data("tests/wf/packed-with-loadlisting.cwl")).as_uri() + "#main" try: with open(get_data("cwltool/extensions.yml"), "r") as res: use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) # The updater transforms LoadListingRequirement from an # extension (in v1.0) to a core feature (in v1.1) but there # was a bug when loading a packed workflow and loading a # specific fragment it would get the un-updated document. # This recreates that case and asserts that we are using the # updated document like we should. tool = load_tool(uri, loadingContext) assert tool.tool["requirements"] == [ {"class": "LoadListingRequirement", "loadListing": "no_listing"} ] finally: use_standard_schema("v1.0")
loadingContext = LoadingContext( { "construct_tool_object": default_make_tool, "resolver": test_resolver, "fetcher_constructor": TestFetcher, } ) load_tool("foo.cwl", loadingContext) assert ( main(["--print-pre", "--debug", "foo.cwl"], loadingContext=loadingContext) == 0 ) root = Path(os.path.join(get_data(""))) path_fragments = [ (os.path.join("tests", "echo.cwl"), "/tests/echo.cwl"), (os.path.join("tests", "echo.cwl") + "#main", "/tests/echo.cwl#main"), (str(root / "tests" / "echo.cwl"), "/tests/echo.cwl"), (str(root / "tests" / "echo.cwl") + "#main", "/tests/echo.cwl#main"), ] def norm(uri): if onWindows(): return uri.lower() return uri
def check_ro(base_path, nested=False): manifest_file = os.path.join(base_path, "metadata", "manifest.json") assert os.path.isfile(manifest_file), "Can't find " + manifest_file arcp_root = find_arcp(base_path) base = urllib.parse.urljoin(arcp_root, "metadata/manifest.json") g = Graph() # Avoid resolving JSON-LD context https://w3id.org/bundle/context # so this test works offline context = Path(get_data("tests/bundle-context.jsonld")).as_uri() with open(manifest_file, "r", encoding="UTF-8") as f: jsonld = f.read() # replace with file:/// URI jsonld = jsonld.replace("https://w3id.org/bundle/context", context) g.parse(data=jsonld, format="json-ld", publicID=base) if os.environ.get("DEBUG"): print("Parsed manifest:\n\n") g.serialize(sys.stdout, format="ttl") ro = None for ro in g.subjects(ORE.isDescribedBy, URIRef(base)): break assert ro is not None, "Can't find RO with ore:isDescribedBy" profile = None for dc in g.objects(ro, DCTERMS.conformsTo): profile = dc break assert profile is not None, "Can't find profile with dct:conformsTo" assert profile == URIRef(provenance.CWLPROV_VERSION),\ "Unexpected cwlprov version " + profile paths = [] externals = [] for aggregate in g.objects(ro, ORE.aggregates): if not arcp.is_arcp_uri(aggregate): externals.append(aggregate) # Won't check external URIs existence here # TODO: Check they are not relative! continue lfile = _arcp2file(base_path, aggregate) paths.append(os.path.relpath(lfile, base_path)) assert os.path.isfile(lfile), "Can't find aggregated " + lfile assert paths, "Didn't find any arcp aggregates" assert externals, "Didn't find any data URIs" for ext in ["provn", "xml", "json", "jsonld", "nt", "ttl"]: f = "metadata/provenance/primary.cwlprov.%s" % ext assert f in paths, "provenance file missing " + f for f in [ "workflow/primary-job.json", "workflow/packed.cwl", "workflow/primary-output.json" ]: assert f in paths, "workflow file missing " + f # Can't test snapshot/ files directly as their name varies # TODO: check urn:hash::sha1 thingies # TODO: Check OA annotations packed = urllib.parse.urljoin(arcp_root, "/workflow/packed.cwl") primary_job = urllib.parse.urljoin(arcp_root, "/workflow/primary-job.json") primary_prov_nt = urllib.parse.urljoin( arcp_root, "/metadata/provenance/primary.cwlprov.nt") uuid = arcp.parse_arcp(arcp_root).uuid highlights = set(g.subjects(OA.motivatedBy, OA.highlighting)) assert highlights, "Didn't find highlights" for h in highlights: assert (h, OA.hasTarget, URIRef(packed)) in g describes = set(g.subjects(OA.motivatedBy, OA.describing)) for d in describes: assert (d, OA.hasBody, URIRef(arcp_root)) in g assert (d, OA.hasTarget, URIRef(uuid.urn)) in g linked = set(g.subjects(OA.motivatedBy, OA.linking)) for l in linked: assert (l, OA.hasBody, URIRef(packed)) in g assert (l, OA.hasBody, URIRef(primary_job)) in g assert (l, OA.hasTarget, URIRef(uuid.urn)) in g has_provenance = set(g.subjects(OA.hasBody, URIRef(primary_prov_nt))) for p in has_provenance: assert (p, OA.hasTarget, URIRef(uuid.urn)) in g assert (p, OA.motivatedBy, PROV.has_provenance) in g # Check all prov elements are listed formats = set() for prov in g.objects(p, OA.hasBody): assert (prov, DCTERMS.conformsTo, URIRef(provenance.CWLPROV_VERSION)) in g # NOTE: DC.format is a Namespace method and does not resolve like other terms formats.update(set(g.objects(prov, DC["format"]))) assert formats, "Could not find media types" expected = set( Literal(f) for f in ("application/json", "application/ld+json", "application/n-triples", 'text/provenance-notation; charset="UTF-8"', 'text/turtle; charset="UTF-8"', "application/xml")) assert formats == expected, "Did not match expected PROV media types" if nested: # Check for additional PROVs # Let's try to find the other wf run ID otherRuns = set() for p in g.subjects(OA.motivatedBy, PROV.has_provenance): if (p, OA.hasTarget, URIRef(uuid.urn)) in g: continue otherRuns.update(set(g.objects(p, OA.hasTarget))) assert otherRuns, "Could not find nested workflow run prov annotations"