def test_secondaryFile_dsl_ref(self):
        ldr = schema_salad.ref_resolver.Loader({})
        ldr.add_context({"secondaryFiles": {"secondaryFilesDSL": True}})

        ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo"}), "")
        self.assertEqual(
            {"secondaryFiles": {
                'pattern': '.foo',
                'required': None
            }}, ra)

        ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo?"}), "")
        self.assertEqual(
            {"secondaryFiles": {
                'pattern': '.foo',
                'required': False
            }}, ra)

        ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo"]}), "")
        self.assertEqual(
            {"secondaryFiles": [{
                'pattern': '.foo',
                'required': None
            }]}, ra)

        ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo?"]}), "")
        self.assertEqual(
            {"secondaryFiles": [{
                'pattern': '.foo',
                'required': False
            }]}, ra)
Exemple #2
0
    def test_mixin(self):
        base_url = schema_salad.ref_resolver.file_uri(os.path.join(os.getcwd(), "tests"))
        ldr = schema_salad.ref_resolver.Loader({})
        ra = ldr.resolve_ref(cmap({"$mixin": get_data("tests/mixin.yml"), "one": "five"}),
                             base_url=base_url)
        self.assertEqual({'id': 'four', 'one': 'five'}, ra[0])
        ldr = schema_salad.ref_resolver.Loader({"id": "@id"})

        ra = ldr.resolve_all(cmap([{
            "id": "a",
            "m": {"$mixin": get_data("tests/mixin.yml")}
        }, {
            "id": "b",
            "m": {"$mixin": get_data("tests/mixin.yml")}
        }]), base_url=base_url)
        self.assertEqual([{
            'id': base_url + '#a',
            'm': {
                'id': base_url + u'#a/four',
                'one': 'two'
            },
        }, {
            'id': base_url + '#b',
            'm': {
                'id': base_url + u'#b/four',
                'one': 'two'}
        }], ra[0])
Exemple #3
0
    def test_typedsl_ref(self):
        ldr = schema_salad.ref_resolver.Loader({})
        ldr.add_context({
            "File": "http://example.com/File",
            "null": "http://example.com/null",
            "array": "http://example.com/array",
            "type": {
                "@type": "@vocab",
                "typeDSL": True
            }
        })

        ra, _ = ldr.resolve_all(cmap({"type": "File"}), "")
        self.assertEqual({'type': 'File'}, ra)

        ra, _ = ldr.resolve_all(cmap({"type": "File?"}), "")
        self.assertEqual({'type': ['null', 'File']}, ra)

        ra, _ = ldr.resolve_all(cmap({"type": "File[]"}), "")
        self.assertEqual({'type': {'items': 'File', 'type': 'array'}}, ra)

        ra, _ = ldr.resolve_all(cmap({"type": "File[]?"}), "")
        self.assertEqual(
            {'type': ['null', {
                'items': 'File',
                'type': 'array'
            }]}, ra)
Exemple #4
0
 def _add(wf):
     found.append(True)
     hints = wf.setdefault("hints", {})
     obj = cmap({"upstream": upstream})
     if set_version:
         obj["version"] = set_version
     if install_to:
         obj["installTo"] = install_to
     if isinstance(hints, list):
         for h in hints:
             if expand_ns(namespaces,
                          h["class"]) == CWLDEP_DEPENDENCIES_URL:
                 for u in h["dependencies"]:
                     if u["upstream"] == upstream:
                         u.update(obj)
                         return
                 h["dependencies"].append(cmap(obj))
                 return
         hints.append(
             cmap({
                 "class": "dep:Dependencies",
                 "dependencies": [obj]
             }))
     elif isinstance(hints, dict):
         for h in hints:
             if expand_ns(namespaces, h) == CWLDEP_DEPENDENCIES_URL:
                 for u in hints[h]["dependencies"]:
                     if u["upstream"] == upstream:
                         u.update(obj)
                         return
                 hints[h]["dependencies"].append(cmap(obj))
                 return
         hints["dep:Dependencies"] = cmap({"dependencies": [obj]})
    def test_mixin(self):
        base_url = schema_salad.ref_resolver.file_uri(os.path.join(os.getcwd(), "tests"))
        ldr = schema_salad.ref_resolver.Loader({})
        ra = ldr.resolve_ref(cmap({"$mixin": get_data("tests/mixin.yml"), "one": "five"}),
                             base_url=base_url)
        self.assertEqual({'id': 'four', 'one': 'five'}, ra[0])
        ldr = schema_salad.ref_resolver.Loader({"id": "@id"})

        ra = ldr.resolve_all(cmap([{
            "id": "a",
            "m": {"$mixin": get_data("tests/mixin.yml")}
        }, {
            "id": "b",
            "m": {"$mixin": get_data("tests/mixin.yml")}
        }]), base_url=base_url)
        self.assertEqual([{
            'id': base_url + '#a',
            'm': {
                'id': base_url + u'#a/four',
                'one': 'two'
            },
        }, {
            'id': base_url + '#b',
            'm': {
                'id': base_url + u'#b/four',
                'one': 'two'}
        }], ra[0])
Exemple #6
0
def test_scoped_id() -> None:
    ldr = Loader({})
    ctx = {
        "id": "@id",
        "location": {"@id": "@id", "@type": "@id"},
        "bar": "http://example.com/bar",
        "ex": "http://example.com/",
    }  # type: ContextType
    ldr.add_context(ctx)

    ra, _ = ldr.resolve_all(
        cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com"
    )
    assert {
        "id": "http://example.com/#foo",
        "bar": {"id": "http://example.com/#foo/baz"},
    } == ra

    g = makerdf(None, ra, ctx)
    print(g.serialize(format="n3"))

    ra, _ = ldr.resolve_all(
        cmap({"location": "foo", "bar": {"location": "baz"}}),
        "http://example.com",
        checklinks=False,
    )
    assert {
        "location": "http://example.com/foo",
        "bar": {"location": "http://example.com/baz"},
    } == ra

    g = makerdf(None, ra, ctx)
    print(g.serialize(format="n3"))

    ra, _ = ldr.resolve_all(
        cmap({"id": "foo", "bar": {"location": "baz"}}),
        "http://example.com",
        checklinks=False,
    )
    assert {
        "id": "http://example.com/#foo",
        "bar": {"location": "http://example.com/baz"},
    } == ra

    g = makerdf(None, ra, ctx)
    print(g.serialize(format="n3"))

    ra, _ = ldr.resolve_all(
        cmap({"location": "foo", "bar": {"id": "baz"}}),
        "http://example.com",
        checklinks=False,
    )
    assert {
        "location": "http://example.com/foo",
        "bar": {"id": "http://example.com/#baz"},
    } == ra

    g = makerdf(None, ra, ctx)
    print(g.serialize(format="n3"))
def test_secondaryFile_dsl_ref() -> None:
    ldr = Loader({})
    ldr.add_context({"secondaryFiles": {"secondaryFilesDSL": True}})

    ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo"}), "")
    assert {"secondaryFiles": {"pattern": ".foo", "required": None}} == ra

    ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo?"}), "")
    assert {"secondaryFiles": {"pattern": ".foo", "required": False}} == ra

    ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo"]}), "")
    assert {"secondaryFiles": [{"pattern": ".foo", "required": None}]} == ra

    ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo?"]}), "")
    assert {"secondaryFiles": [{"pattern": ".foo", "required": False}]} == ra
Exemple #8
0
def get_overall_res_req(res_reqs):
    """Take the overall of a list of ResourceRequirement,
    i.e., the max of coresMin, coresMax, ramMin, ramMax, tmpdirMin, tmpdirMax
    and the sum of outdirMin, outdirMax."""

    all_res_req = {}
    exception_msgs = []
    for a in max_res_pars + sum_res_pars:
        all_res_req[a] = []
        for res_req in res_reqs:
            if a in res_req:
                if isinstance(res_req[a], int): # integer check
                    all_res_req[a].append(res_req[a])
                else:
                    msg = SourceLine(res_req, a).makeError(
                    "Non-top-level ResourceRequirement in single container cannot have expressions")
                    exception_msgs.append(msg)
    if exception_msgs:
        raise WorkflowException("\n".join(exception_msgs))
    else:
        overall_res_req = {}
        for a in all_res_req:
            if all_res_req[a]:
                if a in max_res_pars:
                    overall_res_req[a] = max(all_res_req[a])
                elif a in sum_res_pars:
                    overall_res_req[a] = sum(all_res_req[a])
        if overall_res_req:
            overall_res_req["class"] = "ResourceRequirement"
        return cmap(overall_res_req)
Exemple #9
0
def fetch_document(
    argsworkflow,  # type: Union[Text, Dict[Text, Any]]
    resolver=None,  # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
    fetcher_constructor=None
    # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
):
    # type: (...) -> Tuple[Loader, CommentedMap, Text]
    """Retrieve a CWL document."""

    document_loader = Loader(
        jobloaderctx, fetcher_constructor=fetcher_constructor)  # type: ignore

    uri = None  # type: Text
    workflowobj = None  # type: CommentedMap
    if isinstance(argsworkflow, string_types):
        uri, fileuri = resolve_tool_uri(argsworkflow,
                                        resolver=resolver,
                                        document_loader=document_loader)
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = "#" + Text(id(argsworkflow))
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
Exemple #10
0
    def test_schemas(self):
        loader = schema_salad.ref_resolver.Loader({})

        ra, _ = loader.resolve_all(
            cmap({
                u"$schemas": [
                    schema_salad.ref_resolver.file_uri(
                        get_data("tests/EDAM.owl"))
                ],
                u"$namespaces": {
                    u"edam": u"http://edamontology.org/"
                },
                u"edam:has_format":
                u"edam:format_1915"
            }), "")

        self.assertEqual(
            {
                u"$schemas": [
                    schema_salad.ref_resolver.file_uri(
                        get_data("tests/EDAM.owl"))
                ],
                u"$namespaces": {
                    u"edam": u"http://edamontology.org/"
                },
                u'http://edamontology.org/has_format':
                u'http://edamontology.org/format_1915'
            }, ra)
Exemple #11
0
def get_overall_res_req(res_reqs):
    """Take the overall of a list of ResourceRequirement,
    i.e., the max of coresMin, coresMax, ramMin, ramMax, tmpdirMin, tmpdirMax
    and the sum of outdirMin, outdirMax."""

    all_res_req = {}
    exception_msgs = []
    for a in max_res_pars + sum_res_pars:
        all_res_req[a] = []
        for res_req in res_reqs:
            if a in res_req:
                if isinstance(res_req[a], int): # integer check
                    all_res_req[a].append(res_req[a])
                else:
                    msg = SourceLine(res_req, a).makeError(
                    "Non-top-level ResourceRequirement in single container cannot have expressions")
                    exception_msgs.append(msg)
    if exception_msgs:
        raise WorkflowException("\n".join(exception_msgs))
    else:
        overall_res_req = {}
        for a in all_res_req:
            if all_res_req[a]:
                if a in max_res_pars:
                    overall_res_req[a] = max(all_res_req[a])
                elif a in sum_res_pars:
                    overall_res_req[a] = sum(all_res_req[a])
        if overall_res_req:
            overall_res_req["class"] = "ResourceRequirement"
        return cmap(overall_res_req)
    def test_idmap(self):
        ldr = schema_salad.ref_resolver.Loader({})
        ldr.add_context({
            "inputs": {
                "@id": "http://example.com/inputs",
                "mapSubject": "id",
                "mapPredicate": "a"
            },
            "outputs": {
                "@type": "@id",
                "identity": True,
            },
            "id": "@id"})

        ra, _ = ldr.resolve_all(cmap({
            "id": "stuff",
            "inputs": {
                "zip": 1,
                "zing": 2
            },
            "outputs": ["out"],
            "other": {
                'n': 9
            }
        }), "http://example2.com/")

        self.assertEqual("http://example2.com/#stuff", ra["id"])
        for item in ra["inputs"]:
            if item["a"] == 2:
                self.assertEqual(
                    'http://example2.com/#stuff/zing', item["id"])
            else:
                self.assertEqual('http://example2.com/#stuff/zip', item["id"])
        self.assertEqual(['http://example2.com/#stuff/out'], ra['outputs'])
        self.assertEqual({'n': 9}, ra['other'])
def test_commandLineTool_job_tmpdir_prefix(tmp_path: Path) -> None:
    """Test that non-docker enabled CommandLineTool respects temp directory directives."""
    loading_context = LoadingContext({
        "metadata": {
            "cwlVersion": INTERNAL_VERSION,
            "http://commonwl.org/cwltool#original_cwlVersion":
            INTERNAL_VERSION,
        }
    })
    clt = CommandLineTool(
        cast(
            CommentedMap,
            cmap({
                "cwlVersion": INTERNAL_VERSION,
                "class": "CommandLineTool",
                "inputs": [],
                "outputs": [],
                "requirements": [],
            }),
        ),
        loading_context,
    )
    tmpdir_prefix = str(tmp_path / "1")
    tmp_outdir_prefix = str(tmp_path / "2")
    runtime_context = RuntimeContext({
        "tmpdir_prefix": tmpdir_prefix,
        "tmp_outdir_prefix": tmp_outdir_prefix,
    })
    job = next(clt.job({}, None, runtime_context))
    assert isinstance(job, JobBase)
    assert job.stagedir and job.stagedir.startswith(tmpdir_prefix)
    assert job.tmpdir and job.tmpdir.startswith(tmpdir_prefix)
    assert job.outdir and job.outdir.startswith(tmp_outdir_prefix)
Exemple #14
0
def fetch_document(argsworkflow,        # type: Union[Text, Dict[Text, Any]]
                   loadingContext=None  # type: Optional[LoadingContext]
                  ):  # type: (...) -> Tuple[LoadingContext, CommentedMap, Text]
    """Retrieve a CWL document."""

    if loadingContext is None:
        loadingContext = LoadingContext()
        loadingContext.loader = default_loader()
    else:
        loadingContext = loadingContext.copy()
        if loadingContext.loader is None:
            loadingContext.loader = default_loader(loadingContext.fetcher_constructor)

    uri = None  # type: Optional[Text]
    workflowobj = None  # type: Optional[CommentedMap]
    if isinstance(argsworkflow, string_types):
        uri, fileuri = resolve_tool_uri(argsworkflow,
                                        resolver=loadingContext.resolver,
                                        document_loader=loadingContext.loader)
        workflowobj = loadingContext.loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = argsworkflow["id"] if argsworkflow.get("id") else "_:" + Text(uuid.uuid4())
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
        loadingContext.loader.idx[uri] = workflowobj
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)
    assert workflowobj is not None

    return loadingContext, workflowobj, uri
def test_schemas():
    loader = schema_salad.ref_resolver.Loader({})

    ra, _ = loader.resolve_all(
        cmap({
            "$schemas":
            [schema_salad.ref_resolver.file_uri(get_data("tests/EDAM.owl"))],
            "$namespaces": {
                "edam": "http://edamontology.org/"
            },
            "edam:has_format":
            "edam:format_1915",
        }),
        "",
    )

    assert {
        "$schemas":
        [schema_salad.ref_resolver.file_uri(get_data("tests/EDAM.owl"))],
        "$namespaces": {
            "edam": "http://edamontology.org/"
        },
        "http://edamontology.org/has_format":
        "http://edamontology.org/format_1915",
    } == ra
Exemple #16
0
    def test_idmap(self):
        ldr = schema_salad.ref_resolver.Loader({})
        ldr.add_context({
            "inputs": {
                "@id": "http://example.com/inputs",
                "mapSubject": "id",
                "mapPredicate": "a"
            },
            "outputs": {
                "@type": "@id",
                "identity": True,
            },
            "id": "@id"})

        ra, _ = ldr.resolve_all(cmap({
            "id": "stuff",
            "inputs": {
                "zip": 1,
                "zing": 2
            },
            "outputs": ["out"],
            "other": {
                'n': 9
            }
        }), "http://example2.com/")

        self.assertEqual("http://example2.com/#stuff", ra["id"])
        for item in ra["inputs"]:
            if item["a"] == 2:
                self.assertEqual(
                    'http://example2.com/#stuff/zing', item["id"])
            else:
                self.assertEqual('http://example2.com/#stuff/zip', item["id"])
        self.assertEqual(['http://example2.com/#stuff/out'], ra['outputs'])
        self.assertEqual({'n': 9}, ra['other'])
def test_rdf_datetime() -> None:
    """Affirm that datetime objects can be serialized in makerdf()."""
    ldr = Loader({})
    ctx: ContextType = {
        "id": "@id",
        "location": {"@id": "@id", "@type": "@id"},
        "bar": "http://example.com/bar",
        "ex": "http://example.com/",
    }
    ldr.add_context(ctx)

    ra: CommentedMap = cast(
        CommentedMap,
        ldr.resolve_all(
            cmap(
                {
                    "id": "foo",
                    "bar": {"id": "baz"},
                }
            ),
            "http://example.com",
        )[0],
    )
    ra["s:dateCreated"] = datetime.datetime(2020, 10, 8)

    g = makerdf(None, ra, ctx)
    g.serialize(destination=stdout(), format="n3")
    g2 = makerdf(None, CommentedSeq([ra]), ctx)
    g2.serialize(destination=stdout(), format="n3")
Exemple #18
0
def fetch_document(
    argsworkflow: Union[str, Dict[str, Any]],
    loadingContext: Optional[LoadingContext] = None,
) -> Tuple[LoadingContext, CommentedMap, str]:
    """Retrieve a CWL document."""
    if loadingContext is None:
        loadingContext = LoadingContext()
        loadingContext.loader = default_loader()
    else:
        loadingContext = loadingContext.copy()
        if loadingContext.loader is None:
            loadingContext.loader = default_loader(
                loadingContext.fetcher_constructor,
                enable_dev=loadingContext.enable_dev,
                doc_cache=loadingContext.doc_cache,
            )

    if isinstance(argsworkflow, str):
        uri, fileuri = resolve_tool_uri(
            argsworkflow,
            resolver=loadingContext.resolver,
            document_loader=loadingContext.loader,
        )
        workflowobj = loadingContext.loader.fetch(fileuri)
        return loadingContext, workflowobj, uri
    if isinstance(argsworkflow, dict):
        uri = argsworkflow["id"] if argsworkflow.get("id") else "_:" + str(
            uuid.uuid4())
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
        loadingContext.loader.idx[uri] = workflowobj
        return loadingContext, workflowobj, uri
    raise ValidationException("Must be URI or object: '%s'" % argsworkflow)
Exemple #19
0
def fetch_document(
    argsworkflow,  # type: Union[Text, Dict[Text, Any]]
    loadingContext=None  # type: Optional[LoadingContext]
):  # type: (...) -> Tuple[LoadingContext, CommentedMap, Text]
    """Retrieve a CWL document."""

    if loadingContext is None:
        loadingContext = LoadingContext()
        loadingContext.loader = default_loader()
    else:
        loadingContext = loadingContext.copy()
        if loadingContext.loader is None:
            loadingContext.loader = default_loader(
                loadingContext.fetcher_constructor)

    uri = None  # type: Optional[Text]
    workflowobj = None  # type: Optional[CommentedMap]
    if isinstance(argsworkflow, string_types):
        uri, fileuri = resolve_tool_uri(argsworkflow,
                                        resolver=loadingContext.resolver,
                                        document_loader=loadingContext.loader)
        workflowobj = loadingContext.loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = argsworkflow["id"] if argsworkflow.get("id") else "_:" + Text(
            uuid.uuid4())
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
        loadingContext.loader.idx[uri] = workflowobj
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)
    assert workflowobj is not None

    return loadingContext, workflowobj, uri
Exemple #20
0
def test_idmap() -> None:
    ldr = Loader({})
    ldr.add_context(
        {
            "inputs": {
                "@id": "http://example.com/inputs",
                "mapSubject": "id",
                "mapPredicate": "a",
            },
            "outputs": {"@type": "@id", "identity": True},
            "id": "@id",
        }
    )

    ra, _ = ldr.resolve_all(
        cmap(
            {
                "id": "stuff",
                "inputs": {"zip": 1, "zing": 2},
                "outputs": ["out"],
                "other": {"n": 9},
            }
        ),
        "http://example2.com/",
    )
    assert isinstance(ra, CommentedMap)

    assert "http://example2.com/#stuff" == ra["id"]
    for item in ra["inputs"]:
        if item["a"] == 2:
            assert "http://example2.com/#stuff/zing" == item["id"]
        else:
            assert "http://example2.com/#stuff/zip" == item["id"]
    assert ["http://example2.com/#stuff/out"] == ra["outputs"]
    assert {"n": 9} == ra["other"]
Exemple #21
0
def test_nullable_links() -> None:
    ldr = schema_salad.ref_resolver.Loader({})
    ctx = {"link": {"@type": "@id"}}  # type: ContextType
    ldr.add_context(ctx)

    ra, _ = ldr.resolve_all(cmap({"link": None}), "http://example.com", checklinks=True)
    assert {"link": None} == ra
Exemple #22
0
    def test_run(self, keepdocker):
        for enable_reuse in (True, False):
            arv_docker_clear_cache()

            runner = mock.MagicMock()
            runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
            runner.ignore_docker_for_reuse = False
            runner.intermediate_output_ttl = 0

            keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
            runner.api.collections().get().execute.return_value = {
                "portable_data_hash": "99999999999999999999999999999993+99"}

            document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")

            tool = cmap({
                "inputs": [],
                "outputs": [],
                "baseCommand": "ls",
                "arguments": [{"valueFrom": "$(runtime.outdir)"}]
            })
            make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
                                         collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
            arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
                                                     basedir="", make_fs_access=make_fs_access, loader=Loader({}))
            arvtool.formatgraph = None
            for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run_"+str(enable_reuse),
                                 make_fs_access=make_fs_access, tmpdir="/tmp"):
                j.run(enable_reuse=enable_reuse)
                runner.api.container_requests().create.assert_called_with(
                    body=JsonDiffMatcher({
                        'environment': {
                            'HOME': '/var/spool/cwl',
                            'TMPDIR': '/tmp'
                        },
                        'name': 'test_run_'+str(enable_reuse),
                        'runtime_constraints': {
                            'vcpus': 1,
                            'ram': 1073741824
                        },
                        'use_existing': enable_reuse,
                        'priority': 1,
                        'mounts': {
                            '/tmp': {'kind': 'tmp',
                                     "capacity": 1073741824
                                 },
                            '/var/spool/cwl': {'kind': 'tmp',
                                               "capacity": 1073741824 }
                        },
                        'state': 'Committed',
                        'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                        'output_path': '/var/spool/cwl',
                        'output_ttl': 0,
                        'container_image': 'arvados/jobs',
                        'command': ['ls', '/var/spool/cwl'],
                        'cwd': '/var/spool/cwl',
                        'scheduling_parameters': {},
                        'properties': {},
                    }))
Exemple #23
0
def test_jsonld_ctx() -> None:
    ldr, _, _, _ = schema_salad.schema.load_schema(
        cmap(
            {
                "$base": "Y",
                "name": "X",
                "$namespaces": {"foo": "http://example.com/foo#"},
                "$graph": [
                    {"name": "ExampleType", "type": "enum", "symbols": ["asym", "bsym"]}
                ],
            }
        )
    )

    ra, _ = ldr.resolve_all(cmap({"foo:bar": "asym"}), "X")

    assert ra == {"http://example.com/foo#bar": "asym"}
Exemple #24
0
def test_mixin() -> None:
    base_url = file_uri(os.path.join(os.getcwd(), "tests"))
    ldr = Loader({})
    path = get_data("tests/mixin.yml")
    assert path
    ra = ldr.resolve_ref(cmap({"$mixin": path, "one": "five"}), base_url=base_url)
    assert {"id": "four", "one": "five"} == ra[0]
    ldr = Loader({"id": "@id"})

    ra = ldr.resolve_all(
        cmap([{"id": "a", "m": {"$mixin": path}}, {"id": "b", "m": {"$mixin": path}}]),
        base_url=base_url,
    )
    assert [
        {"id": base_url + "#a", "m": {"id": base_url + "#a/four", "one": "two"}},
        {"id": base_url + "#b", "m": {"id": base_url + "#b/four", "one": "two"}},
    ] == ra[0]
Exemple #25
0
def _convert_stdstreams_to_files(
    workflowobj: Union[MutableMapping[str, Any],
                       MutableSequence[Union[Dict[str, Any], str, int]], str]
) -> None:
    if isinstance(workflowobj, MutableMapping):
        if workflowobj.get("class") == "CommandLineTool":
            with SourceLine(
                    workflowobj,
                    "outputs",
                    ValidationException,
                    _logger.isEnabledFor(logging.DEBUG),
            ):
                outputs = workflowobj.get("outputs", [])
                if not isinstance(outputs, CommentedSeq):
                    raise ValidationException('"outputs" section is not '
                                              "valid.")
                for out in workflowobj.get("outputs", []):
                    if not isinstance(out, CommentedMap):
                        raise ValidationException(
                            "Output '{}' is not a valid "
                            "OutputParameter.".format(out))
                    for streamtype in ["stdout", "stderr"]:
                        if out.get("type") == streamtype:
                            if "outputBinding" in out:
                                raise ValidationException(
                                    "Not allowed to specify outputBinding when"
                                    " using %s shortcut." % streamtype)
                            if streamtype in workflowobj:
                                filename = workflowobj[streamtype]
                            else:
                                filename = str(
                                    hashlib.sha1(  # nosec
                                        json_dumps(workflowobj,
                                                   sort_keys=True).encode(
                                                       "utf-8")).hexdigest())
                                workflowobj[streamtype] = filename
                            out["type"] = "File"
                            out["outputBinding"] = cmap({"glob": filename})
            for inp in workflowobj.get("inputs", []):
                if inp.get("type") == "stdin":
                    if "inputBinding" in inp:
                        raise ValidationException(
                            "Not allowed to specify inputBinding when"
                            " using stdin shortcut.")
                    if "stdin" in workflowobj:
                        raise ValidationException(
                            "Not allowed to specify stdin path when"
                            " using stdin type shortcut.")
                    else:
                        workflowobj["stdin"] = ("$(inputs.%s.path)" %
                                                inp["id"].rpartition("#")[2])
                        inp["type"] = "File"
        else:
            for entry in workflowobj.values():
                _convert_stdstreams_to_files(entry)
    if isinstance(workflowobj, MutableSequence):
        for entry in workflowobj:
            _convert_stdstreams_to_files(entry)
def test_mixin():
    base_url = schema_salad.ref_resolver.file_uri(
        os.path.join(os.getcwd(), "tests"))
    ldr = schema_salad.ref_resolver.Loader({})
    ra = ldr.resolve_ref(cmap({
        "$mixin": get_data("tests/mixin.yml"),
        "one": "five"
    }),
                         base_url=base_url)
    assert {"id": "four", "one": "five"} == ra[0]
    ldr = schema_salad.ref_resolver.Loader({"id": "@id"})

    ra = ldr.resolve_all(
        cmap([
            {
                "id": "a",
                "m": {
                    "$mixin": get_data("tests/mixin.yml")
                }
            },
            {
                "id": "b",
                "m": {
                    "$mixin": get_data("tests/mixin.yml")
                }
            },
        ]),
        base_url=base_url,
    )
    assert [
        {
            "id": base_url + "#a",
            "m": {
                "id": base_url + "#a/four",
                "one": "two"
            }
        },
        {
            "id": base_url + "#b",
            "m": {
                "id": base_url + "#b/four",
                "one": "two"
            }
        },
    ] == ra[0]
Exemple #27
0
def setSecondary(t, fileobj, discovered):
    if isinstance(fileobj, dict) and fileobj.get("class") == "File":
        if "secondaryFiles" not in fileobj:
            fileobj["secondaryFiles"] = cmap([{"location": substitute(fileobj["location"], sf), "class": "File"} for sf in t["secondaryFiles"]])
            if discovered is not None:
                discovered[fileobj["location"]] = fileobj["secondaryFiles"]
    elif isinstance(fileobj, list):
        for e in fileobj:
            setSecondary(t, e, discovered)
def test_import_list() -> None:
    import schema_salad.ref_resolver
    from schema_salad.sourceline import cmap

    basedir = schema_salad.ref_resolver.file_uri(os.path.dirname(__file__) + "/")
    loader = schema_salad.ref_resolver.Loader({})
    ra, _ = loader.resolve_all(cmap({"foo": {"$import": "list.json"}}), basedir)

    assert {"foo": ["bar", "baz"]} == ra
Exemple #29
0
    def test_jsonld_ctx(self):
        ldr, _, _, _ = schema_salad.schema.load_schema(cmap({
            "$base": "Y",
            "name": "X",
            "$namespaces": {
                "foo": "http://example.com/foo#"
            },
            "$graph": [{
                "name": "ExampleType",
                "type": "enum",
                "symbols": ["asym", "bsym"]}]
        }))

        ra, _ = ldr.resolve_all(cmap({"foo:bar": "asym"}), "X")

        self.assertEqual(ra, {
            'http://example.com/foo#bar': 'asym'
        })
Exemple #30
0
    def test_run(self, keepdocker):
        for enable_reuse in (True, False):
            arv_docker_clear_cache()

            runner = mock.MagicMock()
            runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
            runner.ignore_docker_for_reuse = False

            keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
            runner.api.collections().get().execute.return_value = {
                "portable_data_hash": "99999999999999999999999999999993+99"}

            document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")

            tool = cmap({
                "inputs": [],
                "outputs": [],
                "baseCommand": "ls",
                "arguments": [{"valueFrom": "$(runtime.outdir)"}]
            })
            make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
                                         collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
            arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
                                                     basedir="", make_fs_access=make_fs_access, loader=Loader({}))
            arvtool.formatgraph = None
            for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run_"+str(enable_reuse),
                                 make_fs_access=make_fs_access, tmpdir="/tmp"):
                j.run(enable_reuse=enable_reuse)
                runner.api.container_requests().create.assert_called_with(
                    body=JsonDiffMatcher({
                        'environment': {
                            'HOME': '/var/spool/cwl',
                            'TMPDIR': '/tmp'
                        },
                        'name': 'test_run_'+str(enable_reuse),
                        'runtime_constraints': {
                            'vcpus': 1,
                            'ram': 1073741824
                        },
                        'use_existing': enable_reuse,
                        'priority': 1,
                        'mounts': {
                            '/tmp': {'kind': 'tmp',
                                     "capacity": 1073741824
                                 },
                            '/var/spool/cwl': {'kind': 'tmp',
                                               "capacity": 1073741824 }
                        },
                        'state': 'Committed',
                        'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                        'output_path': '/var/spool/cwl',
                        'container_image': 'arvados/jobs',
                        'command': ['ls', '/var/spool/cwl'],
                        'cwd': '/var/spool/cwl',
                        'scheduling_parameters': {},
                        'properties': {},
                    }))
Exemple #31
0
def setSecondary(t, fileobj, discovered):
    if isinstance(fileobj, dict) and fileobj.get("class") == "File":
        if "secondaryFiles" not in fileobj:
            fileobj["secondaryFiles"] = cmap([{"location": substitute(fileobj["location"], sf), "class": "File"} for sf in t["secondaryFiles"]])
            if discovered is not None:
                discovered[fileobj["location"]] = fileobj["secondaryFiles"]
    elif isinstance(fileobj, list):
        for e in fileobj:
            setSecondary(t, e, discovered)
    def test_jsonld_ctx(self):
        ldr, _, _, _ = schema_salad.schema.load_schema(cmap({
            "$base": "Y",
            "name": "X",
            "$namespaces": {
                "foo": "http://example.com/foo#"
            },
            "$graph": [{
                "name": "ExampleType",
                "type": "enum",
                "symbols": ["asym", "bsym"]}]
        }))

        ra, _ = ldr.resolve_all(cmap({"foo:bar": "asym"}), "X")

        self.assertEqual(ra, {
            'http://example.com/foo#bar': 'asym'
        })
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("swagger")
    parser.add_argument("annotations")
    parser.add_argument("url")

    parser.add_argument("--print-rdf", action="store_true", default=False)
    parser.add_argument("--serve", action="store_true", default=False)
    parser.add_argument("--fuseki-path", type=str, default=".")

    args = parser.parse_args()
    warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning)

    with open(args.annotations) as f2:
        annotations = yaml.load(f2)

    with open(args.swagger) as f:
        sld = swg2salad.swg2salad(yaml.load(f), annotations)

    sld["$base"] = "http://ga4gh.org/schemas/tool-registry-schemas"
    sld["name"] = "file://" + os.path.realpath(args.swagger)

    document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
        cmap(sld))

    txt = document_loader.fetch_text(
        urlparse.urljoin(
            "file://" +
            os.getcwd() +
            "/",
            args.url))
    r = yaml.load(txt)

    validate_doc(avsc_names, r, document_loader, True)

    sys.stderr.write("API returned valid response\n")

    toolreg = Namespace("http://ga4gh.org/schemas/tool-registry-schemas#")
    td = Namespace(
        "http://ga4gh.org/schemas/tool-registry-schemas#ToolDescriptor/")

    if args.print_rdf or args.serve:
        g = jsonld_context.makerdf(args.url, r, document_loader.ctx)
        for s, _, o in g.triples((None, td["type"], Literal("CWL"))):
            for _, _, d in g.triples((s, toolreg["descriptor"], None)):
                expand_cwl(d, unicode(s), g)

    if args.print_rdf:
        print(g.serialize(format="turtle"))

    if args.serve:
        t = tempfile.NamedTemporaryFile(suffix=".ttl")
        g.serialize(t, format="turtle")
        t.flush()
        subprocess.check_call(
            ["./fuseki-server", "--file=" + t.name, "/tools"], cwd=args.fuseki_path)
Exemple #34
0
def add_dep(fn, upstream, set_version, install_to):
    document_loader, workflowobj, uri = cwltool.load_tool.fetch_document(fn)
    namespaces = workflowobj.get("$namespaces", cmap({}))

    document_loader.idx = {}
    found = []

    def _add(wf):
        found.append(True)
        hints = wf.setdefault("hints", {})
        obj = cmap({"upstream": upstream})
        if set_version:
            obj["version"] = set_version
        if install_to:
            obj["installTo"] = install_to
        if isinstance(hints, list):
            for h in hints:
                if expand_ns(namespaces,
                             h["class"]) == CWLDEP_DEPENDENCIES_URL:
                    for u in h["dependencies"]:
                        if u["upstream"] == upstream:
                            u.update(obj)
                            return
                    h["dependencies"].append(cmap(obj))
                    return
            hints.append(
                cmap({
                    "class": "dep:Dependencies",
                    "dependencies": [obj]
                }))
        elif isinstance(hints, dict):
            for h in hints:
                if expand_ns(namespaces, h) == CWLDEP_DEPENDENCIES_URL:
                    for u in hints[h]["dependencies"]:
                        if u["upstream"] == upstream:
                            u.update(obj)
                            return
                    hints[h]["dependencies"].append(cmap(obj))
                    return
            hints["dep:Dependencies"] = cmap({"dependencies": [obj]})

    visit_class(workflowobj, ("Workflow", ), _add)

    if not found:
        logging.error("No Workflow found")

    namespaces["dep"] = CWLDEP_URL
    workflowobj["$namespaces"] = namespaces

    del workflowobj["id"]

    with open("_" + fn + "_", "w") as f:
        ruamel.yaml.round_trip_dump(workflowobj, f)
    os.rename("_" + fn + "_", fn)
Exemple #35
0
def _convert_stdstreams_to_files(workflowobj):
    # type: (Union[Dict[Text, Any], List[Dict[Text, Any]]]) -> None

    if isinstance(workflowobj, MutableMapping):
        if workflowobj.get('class') == 'CommandLineTool':
            with SourceLine(workflowobj, "outputs", ValidationException,
                            _logger.isEnabledFor(logging.DEBUG)):
                outputs = workflowobj.get('outputs', [])
                if not isinstance(outputs, CommentedSeq):
                    raise ValidationException('"outputs" section is not '
                                              'valid.')
                for out in workflowobj.get('outputs', []):
                    if not isinstance(out, CommentedMap):
                        raise ValidationException(
                            "Output '{}' is not a valid "
                            "OutputParameter.".format(out))
                    for streamtype in ['stdout', 'stderr']:
                        if out.get('type') == streamtype:
                            if 'outputBinding' in out:
                                raise ValidationException(
                                    "Not allowed to specify outputBinding when"
                                    " using %s shortcut." % streamtype)
                            if streamtype in workflowobj:
                                filename = workflowobj[streamtype]
                            else:
                                filename = Text(
                                    hashlib.sha1(
                                        json_dumps(workflowobj,
                                                   sort_keys=True).encode(
                                                       'utf-8')).hexdigest())
                                workflowobj[streamtype] = filename
                            out['type'] = 'File'
                            out['outputBinding'] = cmap({'glob': filename})
            for inp in workflowobj.get('inputs', []):
                if inp.get('type') == 'stdin':
                    if 'inputBinding' in inp:
                        raise ValidationException(
                            "Not allowed to specify inputBinding when"
                            " using stdin shortcut.")
                    if 'stdin' in workflowobj:
                        raise ValidationException(
                            "Not allowed to specify stdin path when"
                            " using stdin type shortcut.")
                    else:
                        workflowobj['stdin'] = \
                            "$(inputs.%s.path)" % \
                            inp['id'].rpartition('#')[2]
                        inp['type'] = 'File'
        else:
            for entry in itervalues(workflowobj):
                _convert_stdstreams_to_files(entry)
    if isinstance(workflowobj, MutableSequence):
        for entry in workflowobj:
            _convert_stdstreams_to_files(entry)
Exemple #36
0
def _convert_stdstreams_to_files(workflowobj):
    # type: (Union[Dict[Text, Any], List[Dict[Text, Any]]]) -> None

    if isinstance(workflowobj, MutableMapping):
        if workflowobj.get('class') == 'CommandLineTool':
            with SourceLine(workflowobj, "outputs", ValidationException,
                            _logger.isEnabledFor(logging.DEBUG)):
                outputs = workflowobj.get('outputs', [])
                if not isinstance(outputs, CommentedSeq):
                    raise ValidationException('"outputs" section is not '
                                              'valid.')
                for out in workflowobj.get('outputs', []):
                    if not isinstance(out, CommentedMap):
                        raise ValidationException(
                            "Output '{}' is not a valid "
                            "OutputParameter.".format(out))
                    for streamtype in ['stdout', 'stderr']:
                        if out.get('type') == streamtype:
                            if 'outputBinding' in out:
                                raise ValidationException(
                                    "Not allowed to specify outputBinding when"
                                    " using %s shortcut." % streamtype)
                            if streamtype in workflowobj:
                                filename = workflowobj[streamtype]
                            else:
                                filename = Text(
                                    hashlib.sha1(json_dumps(workflowobj,
                                                            sort_keys=True
                                                           ).encode('utf-8')
                                                ).hexdigest())
                                workflowobj[streamtype] = filename
                            out['type'] = 'File'
                            out['outputBinding'] = cmap({'glob': filename})
            for inp in workflowobj.get('inputs', []):
                if inp.get('type') == 'stdin':
                    if 'inputBinding' in inp:
                        raise ValidationException(
                            "Not allowed to specify inputBinding when"
                            " using stdin shortcut.")
                    if 'stdin' in workflowobj:
                        raise ValidationException(
                            "Not allowed to specify stdin path when"
                            " using stdin type shortcut.")
                    else:
                        workflowobj['stdin'] = \
                            "$(inputs.%s.path)" % \
                            inp['id'].rpartition('#')[2]
                        inp['type'] = 'File'
        else:
            for entry in itervalues(workflowobj):
                _convert_stdstreams_to_files(entry)
    if isinstance(workflowobj, MutableSequence):
        for entry in workflowobj:
            _convert_stdstreams_to_files(entry)
def test_subscoped_id():
    ldr = schema_salad.ref_resolver.Loader({})
    ctx = {"id": "@id", "bar": {"subscope": "bar"}}
    ldr.add_context(ctx)

    ra, _ = ldr.resolve_all(
        cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com"
    )
    assert {
        "id": "http://example.com/#foo",
        "bar": {"id": "http://example.com/#foo/bar/baz"},
    } == ra
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("swagger")
    parser.add_argument("annotations")
    parser.add_argument("url")

    parser.add_argument("--print-rdf", action="store_true", default=False)
    parser.add_argument("--serve", action="store_true", default=False)
    parser.add_argument("--fuseki-path", type=str, default=".")

    args = parser.parse_args()
    warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning)

    with open(args.annotations) as f2:
        annotations = yaml.load(f2)

    with open(args.swagger) as f:
        sld = swg2salad.swg2salad(yaml.load(f), annotations)

    sld["$base"] = "http://ga4gh.org/schemas/tool-registry-schemas"
    sld["name"] = "file://" + os.path.realpath(args.swagger)

    document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
        cmap(sld))

    txt = document_loader.fetch_text(
        urlparse.urljoin("file://" + os.getcwd() + "/", args.url))
    r = yaml.load(txt)

    validate_doc(avsc_names, r, document_loader, True)

    sys.stderr.write("API returned valid response\n")

    toolreg = Namespace("http://ga4gh.org/schemas/tool-registry-schemas#")
    td = Namespace(
        "http://ga4gh.org/schemas/tool-registry-schemas#ToolDescriptor/")

    if args.print_rdf or args.serve:
        g = jsonld_context.makerdf(args.url, r, document_loader.ctx)
        for s, _, o in g.triples((None, td["type"], Literal("CWL"))):
            for _, _, d in g.triples((s, toolreg["descriptor"], None)):
                expand_cwl(d, unicode(s), g)

    if args.print_rdf:
        print(g.serialize(format="turtle"))

    if args.serve:
        t = tempfile.NamedTemporaryFile(suffix=".ttl")
        g.serialize(t, format="turtle")
        t.flush()
        subprocess.check_call(
            ["./fuseki-server", "--file=" + t.name, "/tools"],
            cwd=args.fuseki_path)
def test_blank_node_id() -> None:
    # Test that blank nodes are passed through and not considered
    # relative paths.  Blank nodes (also called anonymous ids) are
    # URIs starting with "_:".  They are randomly generated
    # placeholders mainly used internally where an id is needed but
    # was not given.

    ldr = Loader({})
    ctx = {"id": "@id"}  # type: ContextType
    ldr.add_context(ctx)

    ra, _ = ldr.resolve_all(cmap({"id": "_:foo"}), "http://example.com")
    assert {"id": "_:foo"} == ra
Exemple #40
0
    def test_run(self, list_images_in_arv):
        for enable_reuse in (True, False):
            runner = mock.MagicMock()
            runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
            runner.ignore_docker_for_reuse = False
            runner.num_retries = 0
            document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")

            list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]]
            runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"}

            tool = cmap({
                "inputs": [],
                "outputs": [],
                "baseCommand": "ls",
                "arguments": [{"valueFrom": "$(runtime.outdir)"}]
            })
            make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
                                         collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
            arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names,
                                                     basedir="", make_fs_access=make_fs_access, loader=Loader({}))
            arvtool.formatgraph = None
            for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
                j.run(enable_reuse=enable_reuse)
                runner.api.jobs().create.assert_called_with(
                    body=JsonDiffMatcher({
                        'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                        'runtime_constraints': {},
                        'script_parameters': {
                            'tasks': [{
                                'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'},
                                'command': ['ls', '$(task.outdir)']
                            }],
                        },
                        'script_version': 'master',
                        'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
                        'repository': 'arvados',
                        'script': 'crunchrunner',
                        'runtime_constraints': {
                            'docker_image': 'arvados/jobs',
                            'min_cores_per_node': 1,
                            'min_ram_mb_per_node': 1024,
                            'min_scratch_mb_per_node': 2048 # tmpdirSize + outdirSize
                        }
                    }),
                    find_or_create=enable_reuse,
                    filters=[['repository', '=', 'arvados'],
                             ['script', '=', 'crunchrunner'],
                             ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
                             ['docker_image_locator', 'in docker', 'arvados/jobs']]
                )
Exemple #41
0
    def test_schemas(self):
        loader = schema_salad.ref_resolver.Loader({})

        ra, _ = loader.resolve_all(cmap({
            u"$schemas": [schema_salad.ref_resolver.file_uri(get_data("tests/EDAM.owl"))],
            u"$namespaces": {u"edam": u"http://edamontology.org/"},
            u"edam:has_format": u"edam:format_1915"
        }), "")

        self.assertEqual({
            u"$schemas": [schema_salad.ref_resolver.file_uri(get_data("tests/EDAM.owl"))],
            u"$namespaces": {u"edam": u"http://edamontology.org/"},
            u'http://edamontology.org/has_format': u'http://edamontology.org/format_1915'
        }, ra)
Exemple #42
0
    def test_typedsl_ref(self):
        ldr = schema_salad.ref_resolver.Loader({})
        ldr.add_context({
            "File": "http://example.com/File",
            "null": "http://example.com/null",
            "array": "http://example.com/array",
            "type": {
                "@type": "@vocab",
                "typeDSL": True
            }
        })

        ra, _ = ldr.resolve_all(cmap({"type": "File"}), "")
        self.assertEqual({'type': 'File'}, ra)

        ra, _ = ldr.resolve_all(cmap({"type": "File?"}), "")
        self.assertEqual({'type': ['null', 'File']}, ra)

        ra, _ = ldr.resolve_all(cmap({"type": "File[]"}), "")
        self.assertEqual({'type': {'items': 'File', 'type': 'array'}}, ra)

        ra, _ = ldr.resolve_all(cmap({"type": "File[]?"}), "")
        self.assertEqual(
            {'type': ['null', {'items': 'File', 'type': 'array'}]}, ra)
Exemple #43
0
    def test_subscoped_id(self):
        ldr = schema_salad.ref_resolver.Loader({})
        ctx = {
            "id": "@id",
            "bar": {
                "subscope": "bar",
            }
        }
        ldr.add_context(ctx)

        ra, _ = ldr.resolve_all(cmap({
            "id": "foo",
            "bar": {
                "id": "baz"
            }
        }), "http://example.com")
        self.assertEqual({'id': 'http://example.com/#foo',
                          'bar': {
                              'id': 'http://example.com/#foo/bar/baz'},
                          }, ra)
Exemple #44
0
def _convert_stdstreams_to_files(workflowobj):
    # type: (Union[Dict[Text, Any], List[Dict[Text, Any]]]) -> None

    if isinstance(workflowobj, dict):
        if workflowobj.get('class') == 'CommandLineTool':
            for out in workflowobj.get('outputs', []):
                for streamtype in ['stdout', 'stderr']:
                    if out.get('type') == streamtype:
                        if 'outputBinding' in out:
                            raise ValidationException(
                                "Not allowed to specify outputBinding when"
                                " using %s shortcut." % streamtype)
                        if streamtype in workflowobj:
                            filename = workflowobj[streamtype]
                        else:
                            filename = Text(hashlib.sha1(json.dumps(workflowobj,
                                        sort_keys=True).encode('utf-8')).hexdigest())
                            workflowobj[streamtype] = filename
                        out['type'] = 'File'
                        out['outputBinding'] = cmap({'glob': filename})
            for inp in workflowobj.get('inputs', []):
                if inp.get('type') == 'stdin':
                    if 'inputBinding' in inp:
                        raise ValidationException(
                            "Not allowed to specify inputBinding when"
                            " using stdin shortcut.")
                    if 'stdin' in workflowobj:
                        raise ValidationException(
                            "Not allowed to specify stdin path when"
                            " using stdin type shortcut.")
                    else:
                        workflowobj['stdin'] = \
                            "$(inputs.%s.path)" % \
                            inp['id'].rpartition('#')[2]
                        inp['type'] = 'File'
        else:
            for entry in itervalues(workflowobj):
                _convert_stdstreams_to_files(entry)
    if isinstance(workflowobj, list):
        for entry in workflowobj:
            _convert_stdstreams_to_files(entry)
Exemple #45
0
def fetch_document(argsworkflow,  # type: Union[Text, Dict[Text, Any]]
                   resolver=None,  # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
                   fetcher_constructor=None  # type: FetcherConstructorType
                  ):  # type: (...) -> Tuple[Loader, CommentedMap, Text]
    """Retrieve a CWL document."""

    document_loader = default_loader(fetcher_constructor)  # type: ignore

    uri = None  # type: Text
    workflowobj = None  # type: CommentedMap
    if isinstance(argsworkflow, string_types):
        uri, fileuri = resolve_tool_uri(argsworkflow, resolver=resolver,
                document_loader=document_loader)
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = "#" + Text(id(argsworkflow))
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
Exemple #46
0
    def test_timelimit(self, keepdocker):
        arv_docker_clear_cache()

        runner = mock.MagicMock()
        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
        runner.ignore_docker_for_reuse = False
        runner.intermediate_output_ttl = 0
        runner.secret_store = cwltool.secrets.SecretStore()

        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
        runner.api.collections().get().execute.return_value = {
            "portable_data_hash": "99999999999999999999999999999993+99"}

        tool = cmap({
            "inputs": [],
            "outputs": [],
            "baseCommand": "ls",
            "arguments": [{"valueFrom": "$(runtime.outdir)"}],
            "id": "#",
            "class": "CommandLineTool",
            "hints": [
                {
                    "class": "http://commonwl.org/cwltool#TimeLimit",
                    "timelimit": 42
                }
            ]
        })

        loadingContext, runtimeContext = self.helper(runner)
        runtimeContext.name = "test_timelimit"

        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
        arvtool.formatgraph = None

        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
            j.run(runtimeContext)

        _, kwargs = runner.api.container_requests().create.call_args
        self.assertEqual(42, kwargs['body']['scheduling_parameters'].get('max_run_time'))
def fetch_document(argsworkflow,   # type: Union[Text, dict[Text, Any]]
                   resolver=None,  # type: Callable[[Loader, Union[Text, dict[Text, Any]]], Text]
                   fetcher_constructor=None  # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
                   ):
    # type: (...) -> Tuple[Loader, CommentedMap, Text]
    """Retrieve a CWL document."""

    document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"},
                             fetcher_constructor=fetcher_constructor)

    uri = None  # type: Text
    workflowobj = None  # type: CommentedMap
    if isinstance(argsworkflow, basestring):
        split = urlparse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        elif os.path.exists(os.path.abspath(argsworkflow)):
            uri = "file://" + os.path.abspath(argsworkflow)
        elif resolver:
            uri = resolver(document_loader, argsworkflow)

        if uri is None:
            raise ValidationException("Not found: '%s'" % argsworkflow)

        if argsworkflow != uri:
            _logger.info("Resolved '%s' to '%s'", argsworkflow, uri)

        fileuri = urlparse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = "#" + Text(id(argsworkflow))
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
Exemple #48
0
    def test_initial_work_dir(self, collection_mock, keepdocker):
        arv_docker_clear_cache()
        runner = mock.MagicMock()
        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
        runner.ignore_docker_for_reuse = False
        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")

        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
        runner.api.collections().get().execute.return_value = {
            "portable_data_hash": "99999999999999999999999999999993+99"}

        sourcemock = mock.MagicMock()
        def get_collection_mock(p):
            if "/" in p:
                return (sourcemock, p.split("/", 1)[1])
            else:
                return (sourcemock, "")
        runner.fs_access.get_collection.side_effect = get_collection_mock

        vwdmock = mock.MagicMock()
        collection_mock.return_value = vwdmock
        vwdmock.portable_data_hash.return_value = "99999999999999999999999999999996+99"

        tool = cmap({
            "inputs": [],
            "outputs": [],
            "hints": [{
                "class": "InitialWorkDirRequirement",
                "listing": [{
                    "class": "File",
                    "basename": "foo",
                    "location": "keep:99999999999999999999999999999995+99/bar"
                },
                {
                    "class": "Directory",
                    "basename": "foo2",
                    "location": "keep:99999999999999999999999999999995+99"
                },
                {
                    "class": "File",
                    "basename": "filename",
                    "location": "keep:99999999999999999999999999999995+99/baz/filename"
                },
                {
                    "class": "Directory",
                    "basename": "subdir",
                    "location": "keep:99999999999999999999999999999995+99/subdir"
                }                        ]
            }],
            "baseCommand": "ls"
        })
        make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
                                         collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
                                                 avsc_names=avsc_names, make_fs_access=make_fs_access,
                                                 loader=Loader({}))
        arvtool.formatgraph = None
        for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_initial_work_dir",
                             make_fs_access=make_fs_access, tmpdir="/tmp"):
            j.run()

        call_args, call_kwargs = runner.api.container_requests().create.call_args

        vwdmock.copy.assert_has_calls([mock.call('bar', 'foo', source_collection=sourcemock)])
        vwdmock.copy.assert_has_calls([mock.call('', 'foo2', source_collection=sourcemock)])
        vwdmock.copy.assert_has_calls([mock.call('baz/filename', 'filename', source_collection=sourcemock)])
        vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)])

        call_body_expected = {
                'environment': {
                    'HOME': '/var/spool/cwl',
                    'TMPDIR': '/tmp'
                },
                'name': 'test_initial_work_dir',
                'runtime_constraints': {
                    'vcpus': 1,
                    'ram': 1073741824
                },
                'use_existing': True,
                'priority': 1,
                'mounts': {
                    '/tmp': {'kind': 'tmp',
                             "capacity": 1073741824 },
                    '/var/spool/cwl': {'kind': 'tmp',
                                       "capacity": 1073741824 },
                    '/var/spool/cwl/foo': {
                        'kind': 'collection',
                        'path': 'foo',
                        'portable_data_hash': '99999999999999999999999999999996+99'
                    },
                    '/var/spool/cwl/foo2': {
                        'kind': 'collection',
                        'path': 'foo2',
                        'portable_data_hash': '99999999999999999999999999999996+99'
                    },
                    '/var/spool/cwl/filename': {
                        'kind': 'collection',
                        'path': 'filename',
                        'portable_data_hash': '99999999999999999999999999999996+99'
                    },
                    '/var/spool/cwl/subdir': {
                        'kind': 'collection',
                        'path': 'subdir',
                        'portable_data_hash': '99999999999999999999999999999996+99'
                    }
                },
                'state': 'Committed',
                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                'output_path': '/var/spool/cwl',
                'container_image': 'arvados/jobs',
                'command': ['ls'],
                'cwd': '/var/spool/cwl',
                'scheduling_parameters': {
                },
                'properties': {}
        }

        call_body = call_kwargs.get('body', None)
        self.assertNotEqual(None, call_body)
        for key in call_body:
            self.assertEqual(call_body_expected.get(key), call_body.get(key))
def validate_document(document_loader,   # type: Loader
                      workflowobj,       # type: CommentedMap
                      uri,               # type: Text
                      enable_dev=False,  # type: bool
                      strict=True,       # type: bool
                      preprocess_only=False,    # type: bool
                      fetcher_constructor=None  # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
                      ):
    # type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text]
    """Validate a CWL document."""

    if isinstance(workflowobj, list):
        workflowobj = {
            "$graph": workflowobj
        }

    if not isinstance(workflowobj, dict):
        raise ValueError("workflowjobj must be a dict")

    jobobj = None
    if "cwl:tool" in workflowobj:
        jobobj, _ = document_loader.resolve_all(workflowobj, uri)
        uri = urlparse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"])
        del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"]
        workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1]

    fileuri = urlparse.urldefrag(uri)[0]

    if "cwlVersion" in workflowobj:
        if not isinstance(workflowobj["cwlVersion"], (str, Text)):
            raise Exception("'cwlVersion' must be a string, got %s" % type(workflowobj["cwlVersion"]))
        workflowobj["cwlVersion"] = re.sub(
            r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "",
            workflowobj["cwlVersion"])
    else:
        _logger.warn("No cwlVersion found, treating this file as draft-2.")
        workflowobj["cwlVersion"] = "draft-2"

    if workflowobj["cwlVersion"] == "draft-2":
        workflowobj = cast(CommentedMap, cmap(update._draft2toDraft3dev1(
            workflowobj, document_loader, uri, update_steps=False)))
        if "@graph" in workflowobj:
            workflowobj["$graph"] = workflowobj["@graph"]
            del workflowobj["@graph"]

    (sch_document_loader, avsc_names) = \
        process.get_schema(workflowobj["cwlVersion"])[:2]

    if isinstance(avsc_names, Exception):
        raise avsc_names

    processobj = None  # type: Union[CommentedMap, CommentedSeq, unicode]
    document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph,
                  idx=document_loader.idx, cache=sch_document_loader.cache,
                             fetcher_constructor=fetcher_constructor)

    workflowobj["id"] = fileuri
    processobj, metadata = document_loader.resolve_all(workflowobj, fileuri)
    if not isinstance(processobj, (CommentedMap, CommentedSeq)):
        raise ValidationException("Workflow must be a dict or list.")

    if not metadata:
        if not isinstance(processobj, dict):
            raise ValidationException("Draft-2 workflows must be a dict.")
        metadata = cast(CommentedMap, cmap({"$namespaces": processobj.get("$namespaces", {}),
                         "$schemas": processobj.get("$schemas", []),
                         "cwlVersion": processobj["cwlVersion"]},
                        fn=fileuri))

    _convert_stdstreams_to_files(workflowobj)

    if preprocess_only:
        return document_loader, avsc_names, processobj, metadata, uri

    schema.validate_doc(avsc_names, processobj, document_loader, strict)

    if metadata.get("cwlVersion") != update.LATEST:
        processobj = cast(CommentedMap, cmap(update.update(
            processobj, document_loader, fileuri, enable_dev, metadata)))

    if jobobj:
        metadata[u"cwl:defaults"] = jobobj

    return document_loader, avsc_names, processobj, metadata, uri
Exemple #50
0
    def test_run(self, keepdocker):
        for enable_reuse in (True, False):
            arv_docker_clear_cache()

            runner = mock.MagicMock()
            runner.ignore_docker_for_reuse = False
            runner.intermediate_output_ttl = 0
            runner.secret_store = cwltool.secrets.SecretStore()

            keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
            runner.api.collections().get().execute.return_value = {
                "portable_data_hash": "99999999999999999999999999999993+99"}

            tool = cmap({
                "inputs": [],
                "outputs": [],
                "baseCommand": "ls",
                "arguments": [{"valueFrom": "$(runtime.outdir)"}],
                "id": "#",
                "class": "CommandLineTool"
            })

            loadingContext, runtimeContext = self.helper(runner, enable_reuse)

            arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
            arvtool.formatgraph = None

            for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
                j.run(runtimeContext)
                runner.api.container_requests().create.assert_called_with(
                    body=JsonDiffMatcher({
                        'environment': {
                            'HOME': '/var/spool/cwl',
                            'TMPDIR': '/tmp'
                        },
                        'name': 'test_run_'+str(enable_reuse),
                        'runtime_constraints': {
                            'vcpus': 1,
                            'ram': 1073741824
                        },
                        'use_existing': enable_reuse,
                        'priority': 500,
                        'mounts': {
                            '/tmp': {'kind': 'tmp',
                                     "capacity": 1073741824
                                 },
                            '/var/spool/cwl': {'kind': 'tmp',
                                               "capacity": 1073741824 }
                        },
                        'state': 'Committed',
                        'output_name': 'Output for step test_run_'+str(enable_reuse),
                        'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                        'output_path': '/var/spool/cwl',
                        'output_ttl': 0,
                        'container_image': '99999999999999999999999999999993+99',
                        'command': ['ls', '/var/spool/cwl'],
                        'cwd': '/var/spool/cwl',
                        'scheduling_parameters': {},
                        'properties': {},
                        'secret_mounts': {}
                    }))
Exemple #51
0
    def test_secrets(self, keepdocker):
        arv_docker_clear_cache()

        runner = mock.MagicMock()
        runner.ignore_docker_for_reuse = False
        runner.intermediate_output_ttl = 0
        runner.secret_store = cwltool.secrets.SecretStore()

        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
        runner.api.collections().get().execute.return_value = {
            "portable_data_hash": "99999999999999999999999999999993+99"}

        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")

        tool = cmap({"arguments": ["md5sum", "example.conf"],
                     "class": "CommandLineTool",
                     "hints": [
                         {
                             "class": "http://commonwl.org/cwltool#Secrets",
                             "secrets": [
                                 "#secret_job.cwl/pw"
                             ]
                         }
                     ],
                     "id": "#secret_job.cwl",
                     "inputs": [
                         {
                             "id": "#secret_job.cwl/pw",
                             "type": "string"
                         }
                     ],
                     "outputs": [
                     ],
                     "requirements": [
                         {
                             "class": "InitialWorkDirRequirement",
                             "listing": [
                                 {
                                     "entry": "username: user\npassword: $(inputs.pw)\n",
                                     "entryname": "example.conf"
                                 }
                             ]
                         }
                     ]})

        loadingContext, runtimeContext = self.helper(runner)
        runtimeContext.name = "test_secrets"

        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
        arvtool.formatgraph = None

        job_order = {"pw": "blorp"}
        runner.secret_store.store(["pw"], job_order)

        for j in arvtool.job(job_order, mock.MagicMock(), runtimeContext):
            j.run(runtimeContext)
            runner.api.container_requests().create.assert_called_with(
                body=JsonDiffMatcher({
                    'environment': {
                        'HOME': '/var/spool/cwl',
                        'TMPDIR': '/tmp'
                    },
                    'name': 'test_secrets',
                    'runtime_constraints': {
                        'vcpus': 1,
                        'ram': 1073741824
                    },
                    'use_existing': True,
                    'priority': 500,
                    'mounts': {
                        '/tmp': {'kind': 'tmp',
                                 "capacity": 1073741824
                             },
                        '/var/spool/cwl': {'kind': 'tmp',
                                           "capacity": 1073741824 }
                    },
                    'state': 'Committed',
                    'output_name': 'Output for step test_secrets',
                    'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                    'output_path': '/var/spool/cwl',
                    'output_ttl': 0,
                    'container_image': '99999999999999999999999999999993+99',
                    'command': ['md5sum', 'example.conf'],
                    'cwd': '/var/spool/cwl',
                    'scheduling_parameters': {},
                    'properties': {},
                    "secret_mounts": {
                        "/var/spool/cwl/example.conf": {
                            "content": "username: user\npassword: blorp\n",
                            "kind": "text"
                        }
                    }
                }))
Exemple #52
0
def validate_document(document_loader,           # type: Loader
                      workflowobj,               # type: CommentedMap
                      uri,                       # type: Text
                      overrides,                 # type: List[Dict]
                      metadata,                  # type: Dict[Text, Any]
                      enable_dev=False,          # type: bool
                      strict=True,               # type: bool
                      preprocess_only=False,     # type: bool
                      fetcher_constructor=None,  # type: FetcherConstructorType
                      skip_schemas=None,         # type: bool
                      do_validate=True           # type: bool
                     ):
    # type: (...) -> Tuple[Loader, schema.Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text]
    """Validate a CWL document."""

    if isinstance(workflowobj, MutableSequence):
        workflowobj = cmap({
            "$graph": workflowobj
        }, fn=uri)

    if not isinstance(workflowobj, MutableMapping):
        raise ValueError("workflowjobj must be a dict, got '{}': {}".format(
            type(workflowobj), workflowobj))

    jobobj = None
    if "cwl:tool" in workflowobj:
        job_loader = default_loader(fetcher_constructor)  # type: ignore
        jobobj, _ = job_loader.resolve_all(workflowobj, uri, checklinks=do_validate)
        uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"])
        del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"]

        if isinstance(jobobj, CommentedMap) and "http://commonwl.org/cwltool#overrides" in jobobj:
            overrides.extend(resolve_overrides(jobobj, uri, uri))
            del jobobj["http://commonwl.org/cwltool#overrides"]

        workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1]

    fileuri = urllib.parse.urldefrag(uri)[0]
    if "cwlVersion" not in workflowobj:
        if 'cwlVersion' in metadata:
            workflowobj['cwlVersion'] = metadata['cwlVersion']
        else:
            raise ValidationException(
                "No cwlVersion found. "
                "Use the following syntax in your CWL document to declare "
                "the version: cwlVersion: <version>.\n"
                "Note: if this is a CWL draft-2 (pre v1.0) document then it "
                "will need to be upgraded first.")

    if not isinstance(workflowobj["cwlVersion"], string_types):
        with SourceLine(workflowobj, "cwlVersion", ValidationException):
            raise ValidationException("'cwlVersion' must be a string, "
                                      "got {}".format(
                                          type(workflowobj["cwlVersion"])))
    # strip out version
    workflowobj["cwlVersion"] = re.sub(
        r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "",
        workflowobj["cwlVersion"])
    if workflowobj["cwlVersion"] not in list(ALLUPDATES):
        # print out all the Supported Versions of cwlVersion
        versions = []
        for version in list(ALLUPDATES):
            if "dev" in version:
                version += " (with --enable-dev flag only)"
            versions.append(version)
        versions.sort()
        raise ValidationException(
            "The CWL reference runner no longer supports pre CWL v1.0 "
            "documents. Supported versions are: "
            "\n{}".format("\n".join(versions)))

    (sch_document_loader, avsc_names) = \
        process.get_schema(workflowobj["cwlVersion"])[:2]

    if isinstance(avsc_names, Exception):
        raise avsc_names

    processobj = None  # type: Union[CommentedMap, CommentedSeq, Text, None]
    document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph,
                             idx=document_loader.idx, cache=sch_document_loader.cache,
                             fetcher_constructor=fetcher_constructor, skip_schemas=skip_schemas)

    _add_blank_ids(workflowobj)

    workflowobj["id"] = fileuri
    processobj, new_metadata = document_loader.resolve_all(
        workflowobj, fileuri, checklinks=do_validate)
    if not isinstance(processobj, (CommentedMap, CommentedSeq)):
        raise ValidationException("Workflow must be a dict or list.")

    if not new_metadata and isinstance(processobj, CommentedMap):
        new_metadata = cast(CommentedMap, cmap(
            {"$namespaces": processobj.get("$namespaces", {}),
             "$schemas": processobj.get("$schemas", []),
             "cwlVersion": processobj["cwlVersion"]}, fn=fileuri))

    _convert_stdstreams_to_files(workflowobj)

    if preprocess_only:
        return document_loader, avsc_names, processobj, new_metadata, uri

    if do_validate:
        schema.validate_doc(avsc_names, processobj, document_loader, strict)

    if new_metadata.get("cwlVersion") != update.LATEST:
        processobj = cast(CommentedMap, cmap(update.update(
            processobj, document_loader, fileuri, enable_dev, new_metadata)))

    if jobobj is not None:
        new_metadata[u"cwl:defaults"] = jobobj

    if overrides:
        new_metadata[u"cwltool:overrides"] = overrides

    return document_loader, avsc_names, processobj, new_metadata, uri
Exemple #53
0
    def test_scoped_id(self):
        ldr = schema_salad.ref_resolver.Loader({})
        ctx = {
            "id": "@id",
            "location": {
                "@id": "@id",
                "@type": "@id"
            },
            "bar": "http://example.com/bar",
            "ex": "http://example.com/"
        }
        ldr.add_context(ctx)

        ra, _ = ldr.resolve_all(cmap({
            "id": "foo",
            "bar": {
                "id": "baz"
            }
        }), "http://example.com")
        self.assertEqual({'id': 'http://example.com/#foo',
                          'bar': {
                              'id': 'http://example.com/#foo/baz'},
                          }, ra)

        g = makerdf(None, ra, ctx)
        print(g.serialize(format="n3"))

        ra, _ = ldr.resolve_all(cmap({
            "location": "foo",
            "bar": {
                "location": "baz"
            }
        }), "http://example.com", checklinks=False)
        self.assertEqual({'location': 'http://example.com/foo',
                          'bar': {
                              'location': 'http://example.com/baz'},
                          }, ra)

        g = makerdf(None, ra, ctx)
        print(g.serialize(format="n3"))

        ra, _ = ldr.resolve_all(cmap({
            "id": "foo",
            "bar": {
                "location": "baz"
            }
        }), "http://example.com", checklinks=False)
        self.assertEqual({'id': 'http://example.com/#foo',
                          'bar': {
                              'location': 'http://example.com/baz'},
                          }, ra)

        g = makerdf(None, ra, ctx)
        print(g.serialize(format="n3"))

        ra, _ = ldr.resolve_all(cmap({
            "location": "foo",
            "bar": {
                "id": "baz"
            }
        }), "http://example.com", checklinks=False)
        self.assertEqual({'location': 'http://example.com/foo',
                          'bar': {
                              'id': 'http://example.com/#baz'},
                          }, ra)

        g = makerdf(None, ra, ctx)
        print(g.serialize(format="n3"))
Exemple #54
0
def resolve_and_validate_document(loadingContext,
                      workflowobj,
                      uri,
                      preprocess_only=False,     # type: bool
                      skip_schemas=None,         # type: bool
                     ):
    # type: (...) -> Tuple[LoadingContext, Text]
    """Validate a CWL document."""

    loadingContext = loadingContext.copy()

    if not isinstance(workflowobj, MutableMapping):
        raise ValueError("workflowjobj must be a dict, got '{}': {}".format(
            type(workflowobj), workflowobj))

    jobobj = None
    if "cwl:tool" in workflowobj:
        jobobj, _ = loadingContext.loader.resolve_all(workflowobj, uri, checklinks=loadingContext.do_validate)
        uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"])
        del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"]

        workflowobj = fetch_document(uri, loadingContext)[1]

    fileuri = urllib.parse.urldefrag(uri)[0]

    cwlVersion = workflowobj.get("cwlVersion")
    if not cwlVersion:
        fileobj = fetch_document(fileuri, loadingContext)[1]
        cwlVersion = fileobj.get("cwlVersion")
        if not cwlVersion:
            raise ValidationException(
                "No cwlVersion found. "
                "Use the following syntax in your CWL document to declare "
                "the version: cwlVersion: <version>.\n"
                "Note: if this is a CWL draft-2 (pre v1.0) document then it "
                "will need to be upgraded first.")

    if not isinstance(cwlVersion, string_types):
        with SourceLine(workflowobj, "cwlVersion", ValidationException):
            raise ValidationException("'cwlVersion' must be a string, "
                                      "got {}".format(
                                          type(cwlVersion)))
    # strip out version
    cwlVersion = re.sub(
        r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "",
        cwlVersion)
    if cwlVersion not in list(ALLUPDATES):
        # print out all the Supported Versions of cwlVersion
        versions = []
        for version in list(ALLUPDATES):
            if "dev" in version:
                version += " (with --enable-dev flag only)"
            versions.append(version)
        versions.sort()
        raise ValidationException(
            "The CWL reference runner no longer supports pre CWL v1.0 "
            "documents. Supported versions are: "
            "\n{}".format("\n".join(versions)))

    if isinstance(jobobj, CommentedMap) and "http://commonwl.org/cwltool#overrides" in jobobj:
        loadingContext.overrides_list.extend(resolve_overrides(jobobj, uri, uri))
        del jobobj["http://commonwl.org/cwltool#overrides"]

    if isinstance(jobobj, CommentedMap) and "https://w3id.org/cwl/cwl#requirements" in jobobj:
        if cwlVersion not in ("v1.1.0-dev1",):
            raise ValidationException(
                    "`cwl:requirements` in the input object is not part of CWL "
                    "v1.0. You can adjust to use `cwltool:overrides` instead; or you "
                    "can set the cwlVersion to v1.1.0-dev1 or greater and re-run with "
                    "--enable-dev.")
        loadingContext.overrides_list.append({"overrideTarget": uri,
                                              "requirements": jobobj["https://w3id.org/cwl/cwl#requirements"]})
        del jobobj["https://w3id.org/cwl/cwl#requirements"]

    (sch_document_loader, avsc_names) = \
        process.get_schema(cwlVersion)[:2]

    if isinstance(avsc_names, Exception):
        raise avsc_names

    processobj = None  # type: Union[CommentedMap, CommentedSeq, Text, None]
    document_loader = Loader(sch_document_loader.ctx,
                             schemagraph=sch_document_loader.graph,
                             idx=loadingContext.loader.idx,
                             cache=sch_document_loader.cache,
                             fetcher_constructor=loadingContext.fetcher_constructor,
                             skip_schemas=skip_schemas)

    if cwlVersion == "v1.0":
        _add_blank_ids(workflowobj)

    workflowobj["id"] = fileuri
    processobj, metadata = document_loader.resolve_all(
        workflowobj, fileuri, checklinks=loadingContext.do_validate)
    if not isinstance(processobj, (CommentedMap, CommentedSeq)):
        raise ValidationException("Workflow must be a CommentedMap or CommentedSeq.")
    if not isinstance(metadata, CommentedMap):
        raise ValidationException("metadata must be a CommentedMap, was %s" % type(metadata))

    _convert_stdstreams_to_files(workflowobj)

    if preprocess_only:
        return loadingContext, uri

    if loadingContext.do_validate:
        schema.validate_doc(avsc_names, processobj, document_loader, loadingContext.strict)

    if loadingContext.do_update:
        processobj = cast(CommentedMap, cmap(update.update(
            processobj, document_loader, fileuri, loadingContext.enable_dev, metadata)))
        if isinstance(processobj, MutableMapping):
            document_loader.idx[processobj["id"]] = processobj
        elif isinstance(processobj, MutableSequence):
            document_loader.idx[metadata["id"]] = metadata
            for po in processobj:
                document_loader.idx[po["id"]] = po

    if jobobj is not None:
        loadingContext.jobdefaults = jobobj

    loadingContext.loader = document_loader
    loadingContext.avsc_names = avsc_names
    loadingContext.metadata = metadata

    return loadingContext, uri
Exemple #55
0
    def test_resource_requirements(self, keepdocker):
        arv_docker_clear_cache()
        runner = mock.MagicMock()
        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
        runner.ignore_docker_for_reuse = False
        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")

        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
        runner.api.collections().get().execute.return_value = {
            "portable_data_hash": "99999999999999999999999999999993+99"}

        tool = cmap({
            "inputs": [],
            "outputs": [],
            "hints": [{
                "class": "ResourceRequirement",
                "coresMin": 3,
                "ramMin": 3000,
                "tmpdirMin": 4000,
                "outdirMin": 5000
            }, {
                "class": "http://arvados.org/cwl#RuntimeConstraints",
                "keep_cache": 512
            }, {
                "class": "http://arvados.org/cwl#APIRequirement",
            }, {
                "class": "http://arvados.org/cwl#PartitionRequirement",
                "partition": "blurb"
            }],
            "baseCommand": "ls"
        })
        make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess,
                                         collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0))
        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
                                                 avsc_names=avsc_names, make_fs_access=make_fs_access,
                                                 loader=Loader({}))
        arvtool.formatgraph = None
        for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements",
                             make_fs_access=make_fs_access, tmpdir="/tmp"):
            j.run()

        call_args, call_kwargs = runner.api.container_requests().create.call_args

        call_body_expected = {
                'environment': {
                    'HOME': '/var/spool/cwl',
                    'TMPDIR': '/tmp'
                },
                'name': 'test_resource_requirements',
                'runtime_constraints': {
                    'vcpus': 3,
                    'ram': 3145728000,
                    'keep_cache_ram': 536870912,
                    'API': True
                },
                'use_existing': True,
                'priority': 1,
                'mounts': {
                    '/tmp': {'kind': 'tmp',
                             "capacity": 4194304000 },
                    '/var/spool/cwl': {'kind': 'tmp',
                                       "capacity": 5242880000 }
                },
                'state': 'Committed',
                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                'output_path': '/var/spool/cwl',
                'container_image': 'arvados/jobs',
                'command': ['ls'],
                'cwd': '/var/spool/cwl',
                'scheduling_parameters': {
                    'partitions': ['blurb']
                },
                'properties': {}
        }

        call_body = call_kwargs.get('body', None)
        self.assertNotEqual(None, call_body)
        for key in call_body:
            self.assertEqual(call_body_expected.get(key), call_body.get(key))
Exemple #56
0
    def test_resource_requirements(self, keepdocker):
        arv_docker_clear_cache()
        runner = mock.MagicMock()
        runner.ignore_docker_for_reuse = False
        runner.intermediate_output_ttl = 3600
        runner.secret_store = cwltool.secrets.SecretStore()

        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
        runner.api.collections().get().execute.return_value = {
            "portable_data_hash": "99999999999999999999999999999993+99"}

        tool = cmap({
            "inputs": [],
            "outputs": [],
            "hints": [{
                "class": "ResourceRequirement",
                "coresMin": 3,
                "ramMin": 3000,
                "tmpdirMin": 4000,
                "outdirMin": 5000
            }, {
                "class": "http://arvados.org/cwl#RuntimeConstraints",
                "keep_cache": 512
            }, {
                "class": "http://arvados.org/cwl#APIRequirement",
            }, {
                "class": "http://arvados.org/cwl#PartitionRequirement",
                "partition": "blurb"
            }, {
                "class": "http://arvados.org/cwl#IntermediateOutput",
                "outputTTL": 7200
            }, {
                "class": "http://arvados.org/cwl#ReuseRequirement",
                "enableReuse": False
            }],
            "baseCommand": "ls",
            "id": "#",
            "class": "CommandLineTool"
        })

        loadingContext, runtimeContext = self.helper(runner)
        runtimeContext.name = "test_resource_requirements"

        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
        arvtool.formatgraph = None
        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
            j.run(runtimeContext)

        call_args, call_kwargs = runner.api.container_requests().create.call_args

        call_body_expected = {
            'environment': {
                'HOME': '/var/spool/cwl',
                'TMPDIR': '/tmp'
            },
            'name': 'test_resource_requirements',
            'runtime_constraints': {
                'vcpus': 3,
                'ram': 3145728000,
                'keep_cache_ram': 536870912,
                'API': True
            },
            'use_existing': False,
            'priority': 500,
            'mounts': {
                '/tmp': {'kind': 'tmp',
                         "capacity": 4194304000 },
                '/var/spool/cwl': {'kind': 'tmp',
                                   "capacity": 5242880000 }
            },
            'state': 'Committed',
            'output_name': 'Output for step test_resource_requirements',
            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
            'output_path': '/var/spool/cwl',
            'output_ttl': 7200,
            'container_image': '99999999999999999999999999999993+99',
            'command': ['ls'],
            'cwd': '/var/spool/cwl',
            'scheduling_parameters': {
                'partitions': ['blurb']
            },
            'properties': {},
            'secret_mounts': {}
        }

        call_body = call_kwargs.get('body', None)
        self.assertNotEqual(None, call_body)
        for key in call_body:
            self.assertEqual(call_body_expected.get(key), call_body.get(key))
Exemple #57
0
    def test_initial_work_dir(self, collection_mock, keepdocker):
        arv_docker_clear_cache()
        runner = mock.MagicMock()
        runner.ignore_docker_for_reuse = False
        runner.intermediate_output_ttl = 0
        runner.secret_store = cwltool.secrets.SecretStore()

        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
        runner.api.collections().get().execute.return_value = {
            "portable_data_hash": "99999999999999999999999999999993+99"}

        sourcemock = mock.MagicMock()
        def get_collection_mock(p):
            if "/" in p:
                return (sourcemock, p.split("/", 1)[1])
            else:
                return (sourcemock, "")
        runner.fs_access.get_collection.side_effect = get_collection_mock

        vwdmock = mock.MagicMock()
        collection_mock.side_effect = lambda *args, **kwargs: CollectionMock(vwdmock, *args, **kwargs)

        tool = cmap({
            "inputs": [],
            "outputs": [],
            "hints": [{
                "class": "InitialWorkDirRequirement",
                "listing": [{
                    "class": "File",
                    "basename": "foo",
                    "location": "keep:99999999999999999999999999999995+99/bar"
                },
                {
                    "class": "Directory",
                    "basename": "foo2",
                    "location": "keep:99999999999999999999999999999995+99"
                },
                {
                    "class": "File",
                    "basename": "filename",
                    "location": "keep:99999999999999999999999999999995+99/baz/filename"
                },
                {
                    "class": "Directory",
                    "basename": "subdir",
                    "location": "keep:99999999999999999999999999999995+99/subdir"
                }                        ]
            }],
            "baseCommand": "ls",
            "id": "#",
            "class": "CommandLineTool"
        })

        loadingContext, runtimeContext = self.helper(runner)
        runtimeContext.name = "test_initial_work_dir"

        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
        arvtool.formatgraph = None
        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
            j.run(runtimeContext)

        call_args, call_kwargs = runner.api.container_requests().create.call_args

        vwdmock.copy.assert_has_calls([mock.call('bar', 'foo', source_collection=sourcemock)])
        vwdmock.copy.assert_has_calls([mock.call('', 'foo2', source_collection=sourcemock)])
        vwdmock.copy.assert_has_calls([mock.call('baz/filename', 'filename', source_collection=sourcemock)])
        vwdmock.copy.assert_has_calls([mock.call('subdir', 'subdir', source_collection=sourcemock)])

        call_body_expected = {
            'environment': {
                'HOME': '/var/spool/cwl',
                'TMPDIR': '/tmp'
            },
            'name': 'test_initial_work_dir',
            'runtime_constraints': {
                'vcpus': 1,
                'ram': 1073741824
            },
            'use_existing': True,
            'priority': 500,
            'mounts': {
                '/tmp': {'kind': 'tmp',
                         "capacity": 1073741824 },
                '/var/spool/cwl': {'kind': 'tmp',
                                   "capacity": 1073741824 },
                '/var/spool/cwl/foo': {
                    'kind': 'collection',
                    'path': 'foo',
                    'portable_data_hash': '99999999999999999999999999999996+99'
                },
                '/var/spool/cwl/foo2': {
                    'kind': 'collection',
                    'path': 'foo2',
                    'portable_data_hash': '99999999999999999999999999999996+99'
                },
                '/var/spool/cwl/filename': {
                    'kind': 'collection',
                    'path': 'filename',
                    'portable_data_hash': '99999999999999999999999999999996+99'
                },
                '/var/spool/cwl/subdir': {
                    'kind': 'collection',
                    'path': 'subdir',
                    'portable_data_hash': '99999999999999999999999999999996+99'
                }
            },
            'state': 'Committed',
            'output_name': 'Output for step test_initial_work_dir',
            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
            'output_path': '/var/spool/cwl',
            'output_ttl': 0,
            'container_image': '99999999999999999999999999999993+99',
            'command': ['ls'],
            'cwd': '/var/spool/cwl',
            'scheduling_parameters': {
            },
            'properties': {},
            'secret_mounts': {}
        }

        call_body = call_kwargs.get('body', None)
        self.assertNotEqual(None, call_body)
        for key in call_body:
            self.assertEqual(call_body_expected.get(key), call_body.get(key))
Exemple #58
0
    def test_mounts(self, keepdocker):
        arv_docker_clear_cache()

        runner = mock.MagicMock()
        runner.ignore_docker_for_reuse = False
        runner.intermediate_output_ttl = 0
        runner.secret_store = cwltool.secrets.SecretStore()

        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
        runner.api.collections().get().execute.return_value = {
            "portable_data_hash": "99999999999999999999999999999994+99",
            "manifest_text": ". 99999999999999999999999999999994+99 0:0:file1 0:0:file2"}

        document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")

        tool = cmap({
            "inputs": [
                {"id": "p1",
                 "type": "Directory"}
            ],
            "outputs": [],
            "baseCommand": "ls",
            "arguments": [{"valueFrom": "$(runtime.outdir)"}],
            "id": "#",
            "class": "CommandLineTool"
        })

        loadingContext, runtimeContext = self.helper(runner)
        runtimeContext.name = "test_run_mounts"

        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
        arvtool.formatgraph = None
        job_order = {
            "p1": {
                "class": "Directory",
                "location": "keep:99999999999999999999999999999994+44",
                "listing": [
                    {
                        "class": "File",
                        "location": "keep:99999999999999999999999999999994+44/file1",
                    },
                    {
                        "class": "File",
                        "location": "keep:99999999999999999999999999999994+44/file2",
                    }
                ]
            }
        }
        for j in arvtool.job(job_order, mock.MagicMock(), runtimeContext):
            j.run(runtimeContext)
            runner.api.container_requests().create.assert_called_with(
                body=JsonDiffMatcher({
                    'environment': {
                        'HOME': '/var/spool/cwl',
                        'TMPDIR': '/tmp'
                    },
                    'name': 'test_run_mounts',
                    'runtime_constraints': {
                        'vcpus': 1,
                        'ram': 1073741824
                    },
                    'use_existing': True,
                    'priority': 500,
                    'mounts': {
                        "/keep/99999999999999999999999999999994+44": {
                            "kind": "collection",
                            "portable_data_hash": "99999999999999999999999999999994+44"
                        },
                        '/tmp': {'kind': 'tmp',
                                 "capacity": 1073741824 },
                        '/var/spool/cwl': {'kind': 'tmp',
                                           "capacity": 1073741824 }
                    },
                    'state': 'Committed',
                    'output_name': 'Output for step test_run_mounts',
                    'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                    'output_path': '/var/spool/cwl',
                    'output_ttl': 0,
                    'container_image': '99999999999999999999999999999994+99',
                    'command': ['ls', '/var/spool/cwl'],
                    'cwd': '/var/spool/cwl',
                    'scheduling_parameters': {},
                    'properties': {},
                    'secret_mounts': {}
                }))
Exemple #59
0
def validate_document(document_loader,  # type: Loader
                      workflowobj,  # type: CommentedMap
                      uri,  # type: Text
                      enable_dev=False,  # type: bool
                      strict=True,  # type: bool
                      preprocess_only=False,  # type: bool
                      fetcher_constructor=None,  # type: FetcherConstructorType
                      skip_schemas=None,  # type: bool
                      overrides=None,  # type: List[Dict]
                      metadata=None,  # type: Optional[Dict]
                      ):
    # type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text]
    """Validate a CWL document."""

    if isinstance(workflowobj, list):
        workflowobj = cmap({
            "$graph": workflowobj
        }, fn=uri)

    if not isinstance(workflowobj, dict):
        raise ValueError("workflowjobj must be a dict, got '%s': %s" % (type(workflowobj), workflowobj))

    jobobj = None
    if "cwl:tool" in workflowobj:
        job_loader = default_loader(fetcher_constructor)  # type: ignore
        jobobj, _ = job_loader.resolve_all(workflowobj, uri)
        uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"])
        del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"]

        if "http://commonwl.org/cwltool#overrides" in jobobj:
            overrides.extend(resolve_overrides(jobobj, uri, uri))
            del jobobj["http://commonwl.org/cwltool#overrides"]

        workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1]

    fileuri = urllib.parse.urldefrag(uri)[0]
    if "cwlVersion" not in workflowobj:
        if metadata and 'cwlVersion' in metadata:
            workflowobj['cwlVersion'] = metadata['cwlVersion']
        else:
            raise ValidationException("No cwlVersion found."
                "Use the following syntax in your CWL document to declare "
                "the version: cwlVersion: <version>")

    if not isinstance(workflowobj["cwlVersion"], (str, Text)):
        raise Exception("'cwlVersion' must be a string, got %s" % type(workflowobj["cwlVersion"]))
    # strip out version
    workflowobj["cwlVersion"] = re.sub(
        r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "",
        workflowobj["cwlVersion"])
    if workflowobj["cwlVersion"] not in list(ALLUPDATES):
        # print out all the Supported Versions of cwlVersion
        versions = list(ALLUPDATES)  # ALLUPDATES is a dict
        versions.sort()
        raise ValidationException("'cwlVersion' not valid. Supported CWL versions are: \n{}".format("\n".join(versions)))

    if workflowobj["cwlVersion"] == "draft-2":
        workflowobj = cast(CommentedMap, cmap(update._draft2toDraft3dev1(
            workflowobj, document_loader, uri, update_steps=False)))
        if "@graph" in workflowobj:
            workflowobj["$graph"] = workflowobj["@graph"]
            del workflowobj["@graph"]

    (sch_document_loader, avsc_names) = \
        process.get_schema(workflowobj["cwlVersion"])[:2]

    if isinstance(avsc_names, Exception):
        raise avsc_names

    processobj = None  # type: Union[CommentedMap, CommentedSeq, Text]
    document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph,
                             idx=document_loader.idx, cache=sch_document_loader.cache,
                             fetcher_constructor=fetcher_constructor, skip_schemas=skip_schemas)

    _add_blank_ids(workflowobj)

    workflowobj["id"] = fileuri
    processobj, new_metadata = document_loader.resolve_all(workflowobj, fileuri)
    if not isinstance(processobj, (CommentedMap, CommentedSeq)):
        raise ValidationException("Workflow must be a dict or list.")

    if not new_metadata:
        if not isinstance(processobj, dict):
            raise ValidationException("Draft-2 workflows must be a dict.")
        new_metadata = cast(CommentedMap, cmap(
            {"$namespaces": processobj.get("$namespaces", {}),
             "$schemas": processobj.get("$schemas", []),
             "cwlVersion": processobj["cwlVersion"]}, fn=fileuri))

    _convert_stdstreams_to_files(workflowobj)

    if preprocess_only:
        return document_loader, avsc_names, processobj, new_metadata, uri

    schema.validate_doc(avsc_names, processobj, document_loader, strict)

    if new_metadata.get("cwlVersion") != update.LATEST:
        processobj = cast(CommentedMap, cmap(update.update(
            processobj, document_loader, fileuri, enable_dev, new_metadata)))

    if jobobj:
        new_metadata[u"cwl:defaults"] = jobobj

    if overrides:
        new_metadata[u"cwltool:overrides"] = overrides

    return document_loader, avsc_names, processobj, new_metadata, uri
Exemple #60
0
    def job(self, joborder, output_callback, **kwargs):
        kwargs["work_api"] = self.work_api
        req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
        if req:
            with SourceLine(self.tool, None, WorkflowException):
                if "id" not in self.tool:
                    raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
            document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])

            with Perf(metrics, "subworkflow upload_deps"):
                upload_dependencies(self.arvrunner,
                                    os.path.basename(joborder.get("id", "#")),
                                    document_loader,
                                    joborder,
                                    joborder.get("id", "#"),
                                    False)

                if self.wf_pdh is None:
                    workflowobj["requirements"] = dedup_reqs(self.requirements)
                    workflowobj["hints"] = dedup_reqs(self.hints)

                    packed = pack(document_loader, workflowobj, uri, self.metadata)

                    upload_dependencies(self.arvrunner,
                                        kwargs.get("name", ""),
                                        document_loader,
                                        packed,
                                        uri,
                                        False)

            with Perf(metrics, "subworkflow adjust"):
                joborder_keepmount = copy.deepcopy(joborder)

                def keepmount(obj):
                    with SourceLine(obj, None, WorkflowException):
                        if "location" not in obj:
                            raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
                    with SourceLine(obj, "location", WorkflowException):
                        if obj["location"].startswith("keep:"):
                            obj["location"] = "/keep/" + obj["location"][5:]
                            if "listing" in obj:
                                del obj["listing"]
                        elif obj["location"].startswith("_:"):
                            del obj["location"]
                        else:
                            raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])

                adjustFileObjs(joborder_keepmount, keepmount)
                adjustDirObjs(joborder_keepmount, keepmount)

                if self.wf_pdh is None:
                    adjustFileObjs(packed, keepmount)
                    adjustDirObjs(packed, keepmount)
                    self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed)

            wf_runner = cmap({
                "class": "CommandLineTool",
                "baseCommand": "cwltool",
                "inputs": self.tool["inputs"],
                "outputs": self.tool["outputs"],
                "stdout": "cwl.output.json",
                "requirements": workflowobj["requirements"]+[
                    {
                    "class": "InitialWorkDirRequirement",
                    "listing": [{
                            "entryname": "workflow.cwl",
                            "entry": {
                                "class": "File",
                                "location": "keep:%s/workflow.cwl" % self.wf_pdh
                            }
                        }, {
                            "entryname": "cwl.input.yml",
                            "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
                        }]
                }],
                "hints": workflowobj["hints"],
                "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"]
            })
            kwargs["loader"] = self.doc_loader
            kwargs["avsc_names"] = self.doc_schema
            return ArvadosCommandTool(self.arvrunner, wf_runner, **kwargs).job(joborder, output_callback, **kwargs)
        else:
            return super(ArvadosWorkflow, self).job(joborder, output_callback, **kwargs)