Exemple #1
0
def test_get_subgraph():
    loadingContext = LoadingContext({"construct_tool_object": default_make_tool})
    wf = norm(Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri())
    loadingContext.do_update = False
    tool = load_tool(wf, loadingContext)

    sg = norm(Path(get_data("tests/subgraph")).as_uri())

    def clean(val):
        if isinstance(val, str):
            if val.startswith(sg):
                return val[len(sg) + 1 :]
        if isinstance(val, dict):
            return {k: clean(v) for k, v in val.items()}
        if isinstance(val, list):
            return [clean(v) for v in val]
        return val

    for a in (
        "file1",
        "file2",
        "file3",
        "count_output",
        "output3",
        "output4",
        "output5",
        "step1",
        "step2",
        "step3",
        "step4",
        "step5",
    ):
        extracted = get_subgraph([wf + "#" + a], tool)
        with open(get_data("tests/subgraph/extract_" + a + ".json")) as f:
            assert json.load(f) == clean(convert_to_dict(extracted))
Exemple #2
0
def test_input_deps_cmdline_opts_relative_deps_cwd():
    if sys.version_info[0] < 3:
        stream = BytesIO()
    else:
        stream = StringIO()

    data_path = get_data("tests/wf/whale.txt")
    main([
        "--print-input-deps", "--relative-deps", "cwd",
        get_data("tests/wf/count-lines1-wf.cwl"), "--file1", data_path
    ],
         stdout=stream)

    goal = {
        "class":
        "File",
        "location":
        "",
        "format":
        CWL_IANA,
        "secondaryFiles": [{
            "class":
            "File",
            "location":
            str(Path(os.path.relpath(data_path, os.path.curdir))),
            "basename":
            "whale.txt",
            "nameroot":
            "whale",
            "nameext":
            ".txt"
        }]
    }
    assert json.loads(stream.getvalue()) == goal
Exemple #3
0
    def test_resolve_local(self):
        origpath = os.getcwd()
        os.chdir(os.path.join(get_data("")))

        def norm(uri):
            if onWindows():
                return uri.lower()
            else:
                return uri
        try:
            root = Path.cwd()
            rooturi = root.as_uri()
            self.assertEqual(norm(rooturi+"/tests/echo.cwl"),
                    norm(resolve_local(None, os.path.join("tests",
                        "echo.cwl"))))
            self.assertEqual(norm(rooturi+"/tests/echo.cwl#main"),
                    norm(resolve_local(None, os.path.join("tests",
                        "echo.cwl")+"#main")))
            self.assertEqual(norm(rooturi+"/tests/echo.cwl"),
                    norm(resolve_local(None, str(root / "tests" /
                        "echo.cwl"))))
            self.assertEqual(norm(rooturi+"/tests/echo.cwl#main"),
                    norm(resolve_local(None, str(root / "tests" /
                        "echo.cwl")+"#main")))
        finally:
            os.chdir(origpath)
Exemple #4
0
    def test_resolve_local(self):
        origpath = os.getcwd()
        os.chdir(os.path.join(get_data("")))

        def norm(uri):
            if onWindows():
                return uri.lower()
            else:
                return uri

        try:
            root = Path.cwd()
            rooturi = root.as_uri()
            self.assertEqual(
                norm(rooturi + "/tests/echo.cwl"),
                norm(resolve_local(None, os.path.join("tests", "echo.cwl"))))
            self.assertEqual(
                norm(rooturi + "/tests/echo.cwl#main"),
                norm(
                    resolve_local(None,
                                  os.path.join("tests", "echo.cwl") +
                                  "#main")))
            self.assertEqual(
                norm(rooturi + "/tests/echo.cwl"),
                norm(resolve_local(None, str(root / "tests" / "echo.cwl"))))
            self.assertEqual(
                norm(rooturi + "/tests/echo.cwl#main"),
                norm(
                    resolve_local(None,
                                  str(root / "tests" / "echo.cwl") + "#main")))
        finally:
            os.chdir(origpath)
Exemple #5
0
def working_directory(path):
    """Changes working directory and returns to previous on exit."""
    prev_cwd = Path.cwd()
    # before python 3.6 chdir doesn't support paths from pathlib
    os.chdir(str(path))
    try:
        yield
    finally:
        os.chdir(str(prev_cwd))
Exemple #6
0
def working_directory(path):
    """Changes working directory and returns to previous on exit."""
    prev_cwd = Path.cwd()
    # before python 3.6 chdir doesn't support paths from pathlib
    os.chdir(str(path))
    try:
        yield
    finally:
        os.chdir(str(prev_cwd))
Exemple #7
0
def _arcp2file(base_path, uri):
    parsed = arcp.parse_arcp(uri)
    # arcp URIs, ensure they are local to our RO
    assert parsed.uuid == arcp.parse_arcp(find_arcp(base_path)).uuid,\
    'arcp URI must be local to the research object'

    path = parsed.path[1:]  # Strip first /
    # Convert to local path, in case it uses \ on Windows
    lpath = str(Path(path))
    return os.path.join(base_path, lpath)
Exemple #8
0
def get_data(filename):
    # normalizing path depending on OS or else it will cause problem when joining path
    filename = os.path.normpath(filename)
    filepath = None
    try:
        filepath = resource_filename(Requirement.parse("cwltool"), filename)
    except ResolutionError:
        pass
    if not filepath or not os.path.isfile(filepath):
        filepath = os.path.join(os.path.dirname(__file__), os.pardir, filename)
    return str(Path(filepath).resolve())
def test_checklink_outputSource():
    """Is outputSource resolved correctly independent of value of do_validate."""
    outsrc = norm(Path(get_data(
        "tests/wf/1st-workflow.cwl")).as_uri()) + "#argument/classfile"

    loadingContext = LoadingContext({"do_validate": True})
    tool = load_tool(get_data("tests/wf/1st-workflow.cwl"), loadingContext)
    assert norm(tool.tool["outputs"][0]["outputSource"]) == outsrc

    loadingContext = LoadingContext({"do_validate": False})
    tool = load_tool(get_data("tests/wf/1st-workflow.cwl"), loadingContext)
    assert norm(tool.tool["outputs"][0]["outputSource"]) == outsrc
def test_load_graph_fragment():
    """Reloading from a dictionary without a cwlVersion."""
    loadingContext = LoadingContext()
    uri = Path(get_data("tests/wf/scatter-wf4.cwl")).as_uri() + "#main"
    tool = load_tool(uri, loadingContext)

    rs, metadata = tool.doc_loader.resolve_ref(uri)
    # Reload from a dict (in 'rs'), not a URI.  The dict is a fragment
    # of original document and doesn't have cwlVersion set, so test
    # that it correctly looks up the root document to get the
    # cwlVersion.
    tool = load_tool(tool.tool, loadingContext)
    assert tool.metadata["cwlVersion"] == INTERNAL_VERSION
Exemple #11
0
def test_load_graph_fragment_from_packed():
    """Loading a fragment from packed with update."""
    loadingContext = LoadingContext()
    uri = Path(get_data("tests/wf/packed-with-loadlisting.cwl")).as_uri() + "#main"
    try:
        with open(get_data("cwltool/extensions.yml"), "r") as res:
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())

        # The updater transforms LoadListingRequirement from an
        # extension (in v1.0) to a core feature (in v1.1) but there
        # was a bug when loading a packed workflow and loading a
        # specific fragment it would get the un-updated document.
        # This recreates that case and asserts that we are using the
        # updated document like we should.

        tool = load_tool(uri, loadingContext)

        assert tool.tool["requirements"] == [
            {"class": "LoadListingRequirement", "loadListing": "no_listing"}
        ]
    finally:
        use_standard_schema("v1.0")
Exemple #12
0
    loadingContext = LoadingContext(
        {
            "construct_tool_object": default_make_tool,
            "resolver": test_resolver,
            "fetcher_constructor": TestFetcher,
        }
    )

    load_tool("foo.cwl", loadingContext)

    assert (
        main(["--print-pre", "--debug", "foo.cwl"], loadingContext=loadingContext) == 0
    )


root = Path(os.path.join(get_data("")))

path_fragments = [
    (os.path.join("tests", "echo.cwl"), "/tests/echo.cwl"),
    (os.path.join("tests", "echo.cwl") + "#main", "/tests/echo.cwl#main"),
    (str(root / "tests" / "echo.cwl"), "/tests/echo.cwl"),
    (str(root / "tests" / "echo.cwl") + "#main", "/tests/echo.cwl#main"),
]


def norm(uri):
    if onWindows():
        return uri.lower()
    return uri

Exemple #13
0
def check_ro(base_path, nested=False):
    manifest_file = os.path.join(base_path, "metadata", "manifest.json")
    assert os.path.isfile(manifest_file), "Can't find " + manifest_file
    arcp_root = find_arcp(base_path)
    base = urllib.parse.urljoin(arcp_root, "metadata/manifest.json")
    g = Graph()

    # Avoid resolving JSON-LD context https://w3id.org/bundle/context
    # so this test works offline
    context = Path(get_data("tests/bundle-context.jsonld")).as_uri()
    with open(manifest_file, "r", encoding="UTF-8") as f:
        jsonld = f.read()
        # replace with file:/// URI
        jsonld = jsonld.replace("https://w3id.org/bundle/context", context)
    g.parse(data=jsonld, format="json-ld", publicID=base)
    if os.environ.get("DEBUG"):
        print("Parsed manifest:\n\n")
        g.serialize(sys.stdout, format="ttl")
    ro = None

    for ro in g.subjects(ORE.isDescribedBy, URIRef(base)):
        break
    assert ro is not None, "Can't find RO with ore:isDescribedBy"

    profile = None
    for dc in g.objects(ro, DCTERMS.conformsTo):
        profile = dc
        break
    assert profile is not None, "Can't find profile with dct:conformsTo"
    assert profile == URIRef(provenance.CWLPROV_VERSION),\
        "Unexpected cwlprov version " + profile

    paths = []
    externals = []
    for aggregate in g.objects(ro, ORE.aggregates):
        if not arcp.is_arcp_uri(aggregate):
            externals.append(aggregate)
            # Won't check external URIs existence here
            # TODO: Check they are not relative!
            continue
        lfile = _arcp2file(base_path, aggregate)
        paths.append(os.path.relpath(lfile, base_path))
        assert os.path.isfile(lfile), "Can't find aggregated " + lfile

    assert paths, "Didn't find any arcp aggregates"
    assert externals, "Didn't find any data URIs"

    for ext in ["provn", "xml", "json", "jsonld", "nt", "ttl"]:
        f = "metadata/provenance/primary.cwlprov.%s" % ext
        assert f in paths, "provenance file missing " + f

    for f in [
            "workflow/primary-job.json", "workflow/packed.cwl",
            "workflow/primary-output.json"
    ]:
        assert f in paths, "workflow file missing " + f
    # Can't test snapshot/ files directly as their name varies

    # TODO: check urn:hash::sha1 thingies
    # TODO: Check OA annotations

    packed = urllib.parse.urljoin(arcp_root, "/workflow/packed.cwl")
    primary_job = urllib.parse.urljoin(arcp_root, "/workflow/primary-job.json")
    primary_prov_nt = urllib.parse.urljoin(
        arcp_root, "/metadata/provenance/primary.cwlprov.nt")
    uuid = arcp.parse_arcp(arcp_root).uuid

    highlights = set(g.subjects(OA.motivatedBy, OA.highlighting))
    assert highlights, "Didn't find highlights"
    for h in highlights:
        assert (h, OA.hasTarget, URIRef(packed)) in g

    describes = set(g.subjects(OA.motivatedBy, OA.describing))
    for d in describes:
        assert (d, OA.hasBody, URIRef(arcp_root)) in g
        assert (d, OA.hasTarget, URIRef(uuid.urn)) in g

    linked = set(g.subjects(OA.motivatedBy, OA.linking))
    for l in linked:
        assert (l, OA.hasBody, URIRef(packed)) in g
        assert (l, OA.hasBody, URIRef(primary_job)) in g
        assert (l, OA.hasTarget, URIRef(uuid.urn)) in g

    has_provenance = set(g.subjects(OA.hasBody, URIRef(primary_prov_nt)))
    for p in has_provenance:
        assert (p, OA.hasTarget, URIRef(uuid.urn)) in g
        assert (p, OA.motivatedBy, PROV.has_provenance) in g
        # Check all prov elements are listed
        formats = set()
        for prov in g.objects(p, OA.hasBody):
            assert (prov, DCTERMS.conformsTo,
                    URIRef(provenance.CWLPROV_VERSION)) in g
            # NOTE: DC.format is a Namespace method and does not resolve like other terms
            formats.update(set(g.objects(prov, DC["format"])))
        assert formats, "Could not find media types"
        expected = set(
            Literal(f)
            for f in ("application/json", "application/ld+json",
                      "application/n-triples",
                      'text/provenance-notation; charset="UTF-8"',
                      'text/turtle; charset="UTF-8"', "application/xml"))
        assert formats == expected, "Did not match expected PROV media types"

    if nested:
        # Check for additional PROVs
        # Let's try to find the other wf run ID
        otherRuns = set()
        for p in g.subjects(OA.motivatedBy, PROV.has_provenance):
            if (p, OA.hasTarget, URIRef(uuid.urn)) in g:
                continue
            otherRuns.update(set(g.objects(p, OA.hasTarget)))
        assert otherRuns, "Could not find nested workflow run prov annotations"