Beispiel #1
0
def load_job_order(args,   # type: argparse.Namespace
                   stdin,  # type: IO[Any]
                   fetcher_constructor,  # Fetcher
                   overrides,  # type: List[Dict[Text, Any]]
                   tool_file_uri  # type: Text
):
    # type: (...) -> Tuple[Dict[Text, Any], Text, Loader]

    job_order_object = None

    _jobloaderctx = jobloaderctx.copy()
    loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor)  # type: ignore

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)

    if job_order_object and "http://commonwl.org/cwltool#overrides" in job_order_object:
        overrides.extend(resolve_overrides(job_order_object, file_uri(job_order_file), tool_file_uri))
        del job_order_object["http://commonwl.org/cwltool#overrides"]

    if not job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()

    return (job_order_object, input_basedir, loader)
Beispiel #2
0
def resolve_overrides(ov, ov_uri, baseurl):  # type: (CommentedMap, Text, Text) -> List[Dict[Text, Any]]
    ovloader = Loader(overrides_ctx)
    ret, _ = ovloader.resolve_all(ov, baseurl)
    if not isinstance(ret, CommentedMap):
        raise Exception("Expected CommentedMap, got %s" % type(ret))
    cwl_docloader = get_schema("v1.0")[0]
    cwl_docloader.resolve_all(ret, ov_uri)
    return ret["overrides"]
def create_loader(ctx):
    loader = Loader()
    url_fields = []
    for c in ctx:
        if c != "id" and (ctx[c] == "@id") or (isinstance(ctx[c], dict) and ctx[c].get("@type") == "@id"):
            url_fields.append(c)
    loader.url_fields = url_fields
    loader.idx["cwl:JsonPointer"] = {}
    return loader
Beispiel #4
0
def resolve_overrides(ov,      # Type: CommentedMap
                      ov_uri,  # Type: Text
                      baseurl  # type: Text
                     ):  # type: (...) -> List[Dict[Text, Any]]
    ovloader = Loader(overrides_ctx)
    ret, _ = ovloader.resolve_all(ov, baseurl)
    if not isinstance(ret, CommentedMap):
        raise Exception("Expected CommentedMap, got %s" % type(ret))
    cwl_docloader = get_schema("v1.0")[0]
    cwl_docloader.resolve_all(ret, ov_uri)
    return ret["http://commonwl.org/cwltool#overrides"]
Beispiel #5
0
def resolve_overrides(
    ov,  # Type: CommentedMap
    ov_uri,  # Type: Text
    baseurl  # type: Text
):  # type: (...) -> List[Dict[Text, Any]]
    ovloader = Loader(overrides_ctx)
    ret, _ = ovloader.resolve_all(ov, baseurl)
    if not isinstance(ret, CommentedMap):
        raise Exception("Expected CommentedMap, got %s" % type(ret))
    cwl_docloader = get_schema("v1.0")[0]
    cwl_docloader.resolve_all(ret, ov_uri)
    return ret["overrides"]
def resolve_overrides(
    ov: IdxResultType,
    ov_uri: str,
    baseurl: str,
) -> List[CWLObjectType]:
    ovloader = Loader(overrides_ctx)
    ret, _ = ovloader.resolve_all(ov, baseurl)
    if not isinstance(ret, CommentedMap):
        raise Exception("Expected CommentedMap, got %s" % type(ret))
    cwl_docloader = get_schema("v1.0")[0]
    cwl_docloader.resolve_all(ret, ov_uri)
    return cast(List[CWLObjectType], ret["http://commonwl.org/cwltool#overrides"])
def test_blank_node_id() -> None:
    # Test that blank nodes are passed through and not considered
    # relative paths.  Blank nodes (also called anonymous ids) are
    # URIs starting with "_:".  They are randomly generated
    # placeholders mainly used internally where an id is needed but
    # was not given.

    ldr = Loader({})
    ctx = {"id": "@id"}  # type: ContextType
    ldr.add_context(ctx)

    ra, _ = ldr.resolve_all(cmap({"id": "_:foo"}), "http://example.com")
    assert {"id": "_:foo"} == ra
Beispiel #8
0
def resolve_overrides(
        ov,  # type: CommentedMap
        ov_uri,  # type: str
        baseurl,  # type: str
):  # type: (...) -> List[Dict[str, Any]]
    ovloader = Loader(overrides_ctx)
    ret, _ = ovloader.resolve_all(ov, baseurl)
    if not isinstance(ret, CommentedMap):
        raise Exception("Expected CommentedMap, got %s" % type(ret))
    cwl_docloader = get_schema("v1.0")[0]
    cwl_docloader.resolve_all(ret, ov_uri)
    return cast(List[Dict[str, Any]],
                ret["http://commonwl.org/cwltool#overrides"])
Beispiel #9
0
def load_job_order(
    args,  # type: argparse.Namespace
    stdin,  # type: IO[Any]
    fetcher_constructor,  # Fetcher
    overrides_list,  # type: List[Dict[Text, Any]]
    tool_file_uri  # type: Text
):  # type: (...) -> Tuple[Optional[MutableMapping[Text, Any]], Text, Loader]

    job_order_object = None
    job_order_file = None

    _jobloaderctx = jobloaderctx.copy()
    loader = Loader(_jobloaderctx,
                    fetcher_constructor=fetcher_constructor)  # type: ignore

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object,
                                                 file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object is not None:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file is not None:
        input_basedir = args.basedir if args.basedir \
            else os.path.abspath(os.path.dirname(job_order_file))
        job_order_object, _ = loader.resolve_ref(job_order_file,
                                                 checklinks=False)

    if job_order_object is not None and "http://commonwl.org/cwltool#overrides" in job_order_object:
        ov_uri = file_uri(job_order_file or input_basedir)
        overrides_list.extend(
            resolve_overrides(job_order_object, ov_uri, tool_file_uri))
        del job_order_object["http://commonwl.org/cwltool#overrides"]

    if job_order_object is None:
        input_basedir = args.basedir if args.basedir else os.getcwd()

    if job_order_object is not None and not isinstance(job_order_object,
                                                       MutableMapping):
        _logger.error(
            'CWL input object at %s is not formatted correctly, it should be a '
            'JSON/YAML dictionay, not %s.\n'
            'Raw input object:\n%s', job_order_file or "stdin",
            type(job_order_object), job_order_object)
        sys.exit(1)
    return (job_order_object, input_basedir, loader)
Beispiel #10
0
def test_scoped_id() -> None:
    ldr = Loader({})
    ctx = {
        "id": "@id",
        "location": {"@id": "@id", "@type": "@id"},
        "bar": "http://example.com/bar",
        "ex": "http://example.com/",
    }  # type: ContextType
    ldr.add_context(ctx)

    ra, _ = ldr.resolve_all(
        cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com"
    )
    assert {
        "id": "http://example.com/#foo",
        "bar": {"id": "http://example.com/#foo/baz"},
    } == ra

    g = makerdf(None, ra, ctx)
    print(g.serialize(format="n3"))

    ra, _ = ldr.resolve_all(
        cmap({"location": "foo", "bar": {"location": "baz"}}),
        "http://example.com",
        checklinks=False,
    )
    assert {
        "location": "http://example.com/foo",
        "bar": {"location": "http://example.com/baz"},
    } == ra

    g = makerdf(None, ra, ctx)
    print(g.serialize(format="n3"))

    ra, _ = ldr.resolve_all(
        cmap({"id": "foo", "bar": {"location": "baz"}}),
        "http://example.com",
        checklinks=False,
    )
    assert {
        "id": "http://example.com/#foo",
        "bar": {"location": "http://example.com/baz"},
    } == ra

    g = makerdf(None, ra, ctx)
    print(g.serialize(format="n3"))

    ra, _ = ldr.resolve_all(
        cmap({"location": "foo", "bar": {"id": "baz"}}),
        "http://example.com",
        checklinks=False,
    )
    assert {
        "location": "http://example.com/foo",
        "bar": {"id": "http://example.com/#baz"},
    } == ra

    g = makerdf(None, ra, ctx)
    print(g.serialize(format="n3"))
Beispiel #11
0
def test_subscoped_id() -> None:
    ldr = Loader({})
    ctx = {
        "id": "@id",
        "bar": {"subscope": "bar"},
    }  # type: ContextType
    ldr.add_context(ctx)

    ra, _ = ldr.resolve_all(
        cmap({"id": "foo", "bar": {"id": "baz"}}), "http://example.com"
    )
    assert {
        "id": "http://example.com/#foo",
        "bar": {"id": "http://example.com/#foo/bar/baz"},
    } == ra
Beispiel #12
0
def resolve_tool_uri(
    argsworkflow,  # type: Text
    resolver=None,  # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
    fetcher_constructor=None,
    # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
    document_loader=None  # type: Loader
):
    # type: (...) -> Tuple[Text, Text]

    uri = None  # type: Text
    split = urllib.parse.urlsplit(argsworkflow)
    # In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that
    if split.scheme and split.scheme in [u'http', u'https', u'file']:
        uri = argsworkflow
    elif os.path.exists(os.path.abspath(argsworkflow)):
        uri = file_uri(str(os.path.abspath(argsworkflow)))
    elif resolver:
        if document_loader is None:
            document_loader = Loader(
                jobloaderctx,
                fetcher_constructor=fetcher_constructor)  # type: ignore
        uri = resolver(document_loader, argsworkflow)

    if uri is None:
        raise ValidationException("Not found: '%s'" % argsworkflow)

    if argsworkflow != uri:
        _logger.info("Resolved '%s' to '%s'", argsworkflow, uri)

    fileuri = urllib.parse.urldefrag(uri)[0]
    return uri, fileuri
def test_sourceline() -> None:
    ldr = Loader({"id": "@id"})
    path = get_data("tests/frag.yml")
    assert path
    b, _ = ldr.resolve_ref(path)

    class TestExp(Exception):
        pass

    try:
        with SourceLine(b, 1, TestExp, False):
            raise Exception("Whoops")
    except TestExp as e:
        assert str(e).endswith("frag.yml:3:3: Whoops"), e
    except Exception as exc:
        assert False, exc
Beispiel #14
0
def default_loader(fetcher_constructor):
    # type: (Optional[FetcherConstructorType]) -> Loader
    if fetcher_constructor in loaders:
        return loaders[fetcher_constructor]
    loader = Loader(jobloaderctx, fetcher_constructor=fetcher_constructor)
    loaders[fetcher_constructor] = loader
    return loader
Beispiel #15
0
def default_loader(fetcher_constructor=None, enable_dev=False, doc_cache=True):
    # type: (Optional[FetcherConstructorType], bool, bool) -> Loader
    return Loader(
        jobloaderctx,
        fetcher_constructor=fetcher_constructor,
        allow_attachments=lambda r: enable_dev,
        doc_cache=doc_cache,
    )
Beispiel #16
0
def load_job_order(args,                 # type: argparse.Namespace
                   stdin,                # type: IO[Any]
                   fetcher_constructor,  # Fetcher
                   overrides_list,       # type: List[Dict[Text, Any]]
                   tool_file_uri         # type: Text
                  ):  # type: (...) -> Tuple[Optional[MutableMapping[Text, Any]], Text, Loader]

    job_order_object = None
    job_order_file = None

    _jobloaderctx = jobloaderctx.copy()
    loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor)  # type: ignore

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object is not None:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file is not None:
        input_basedir = args.basedir if args.basedir \
            else os.path.abspath(os.path.dirname(job_order_file))
        job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)

    if job_order_object is not None and "http://commonwl.org/cwltool#overrides" in job_order_object:
        ov_uri = file_uri(job_order_file or input_basedir)
        overrides_list.extend(
            resolve_overrides(job_order_object, ov_uri, tool_file_uri))
        del job_order_object["http://commonwl.org/cwltool#overrides"]

    if job_order_object is None:
        input_basedir = args.basedir if args.basedir else os.getcwd()

    if job_order_object is not None and not isinstance(job_order_object, MutableMapping):
        _logger.error(
            'CWL input object at %s is not formatted correctly, it should be a '
            'JSON/YAML dictionay, not %s.\n'
            'Raw input object:\n%s', job_order_file or "stdin",
            type(job_order_object), job_order_object)
        sys.exit(1)
    return (job_order_object, input_basedir, loader)
def test_Loader_initialisation_for_TMP_env_var(tmp_dir_fixture):
    # Ensure HOME is missing.
    if "HOME" in os.environ:
        del os.environ["HOME"]
    # Ensure TMP is present.
    os.environ["TMP"] = tmp_dir_fixture

    loader = Loader(ctx={})
    assert isinstance(loader.session, Session)
def test_Loader_initialisation_with_neither_TMP_HOME_set(tmp_dir_fixture):
    # Ensure HOME is missing.
    if "HOME" in os.environ:
        del os.environ["HOME"]
    if "TMP" in os.environ:
        del os.environ["TMP"]

    loader = Loader(ctx={})
    assert isinstance(loader.session, Session)
Beispiel #19
0
def test_fetcher() -> None:

    loader = Loader({}, fetcher_constructor=testFetcher)
    assert {"hello": "foo"} == loader.resolve_ref("foo.txt")[0]
    assert {
        "hello": "keepfoo"
    } == loader.resolve_ref("foo.txt", base_url="keep:abc+123")[0]
    assert loader.check_exists("foo.txt")

    with pytest.raises(RuntimeError):
        loader.resolve_ref("bar.txt")
    assert not loader.check_exists("bar.txt")
def test_Loader_initialisation_for_HOME_env_var(tmp_dir_fixture):
    import os
    from schema_salad.ref_resolver import Loader
    from requests import Session

    # Ensure HOME is set.
    os.environ["HOME"] = tmp_dir_fixture

    loader = Loader(ctx={})
    assert isinstance(loader.session, Session)
Beispiel #21
0
def load_job_order(
        args,  # type: argparse.Namespace
        stdin,  # type: IO[Any]
        fetcher_constructor,  # Fetcher
        overrides,  # type: List[Dict[Text, Any]]
        tool_file_uri  # type: Text
):
    # type: (...) -> Tuple[Dict[Text, Any], Text, Loader]

    job_order_object = None

    _jobloaderctx = jobloaderctx.copy()
    loader = Loader(_jobloaderctx,
                    fetcher_constructor=fetcher_constructor)  # type: ignore

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object,
                                                 file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(
            os.path.dirname(job_order_file))
        job_order_object, _ = loader.resolve_ref(job_order_file,
                                                 checklinks=False)

    if job_order_object and "http://commonwl.org/cwltool#overrides" in job_order_object:
        overrides.extend(
            resolve_overrides(job_order_object, file_uri(job_order_file),
                              tool_file_uri))
        del job_order_object["http://commonwl.org/cwltool#overrides"]

    if not job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()

    return (job_order_object, input_basedir, loader)
Beispiel #22
0
def default_loader(
    fetcher_constructor: Optional[FetcherCallableType] = None,
    enable_dev: bool = False,
    doc_cache: bool = True,
) -> Loader:
    return Loader(
        jobloaderctx,
        fetcher_constructor=fetcher_constructor,
        allow_attachments=lambda r: enable_dev,
        doc_cache=doc_cache,
    )
Beispiel #23
0
def fetch_document(
    argsworkflow,  # type: Union[Text, dict[Text, Any]]
    resolver=None,  # type: Callable[[Loader, Union[Text, dict[Text, Any]]], Text]
    fetcher_constructor=None
    # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
):
    # type: (...) -> Tuple[Loader, CommentedMap, Text]
    """Retrieve a CWL document."""

    document_loader = Loader({
        "cwl": "https://w3id.org/cwl/cwl#",
        "id": "@id"
    },
                             fetcher_constructor=fetcher_constructor)

    uri = None  # type: Text
    workflowobj = None  # type: CommentedMap
    if isinstance(argsworkflow, string_types):
        split = urllib.parse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        elif os.path.exists(os.path.abspath(argsworkflow)):
            uri = file_uri(str(os.path.abspath(argsworkflow)))
        elif resolver:
            uri = resolver(document_loader, argsworkflow)

        if uri is None:
            raise ValidationException("Not found: '%s'" % argsworkflow)

        if argsworkflow != uri:
            _logger.info("Resolved '%s' to '%s'", argsworkflow, uri)

        fileuri = urllib.parse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = "#" + Text(id(argsworkflow))
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
Beispiel #24
0
def test_schemas() -> None:
    loader = Loader({})

    path = get_data("tests/EDAM.owl")
    assert path
    ra, _ = loader.resolve_all(
        cmap(
            {
                "$schemas": [file_uri(path)],
                "$namespaces": {"edam": "http://edamontology.org/"},
                "edam:has_format": "edam:format_1915",
            }
        ),
        "",
    )

    assert {
        "$schemas": [file_uri(path)],
        "$namespaces": {"edam": "http://edamontology.org/"},
        "http://edamontology.org/has_format": "http://edamontology.org/format_1915",
    } == ra
Beispiel #25
0
def fetch_document(argsworkflow):
    # type: (Union[Text, Text, dict[Text, Any]]) -> Tuple[Loader, Dict[Text, Any], Text]
    """Retrieve a CWL document."""
    document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"})

    uri = None  # type: Text
    workflowobj = None  # type: Dict[Text, Any]
    if isinstance(argsworkflow, basestring):
        split = urlparse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        else:
            uri = "file://" + os.path.abspath(argsworkflow)
        fileuri = urlparse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        workflowobj = argsworkflow
        uri = "#" + Text(id(argsworkflow))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
Beispiel #26
0
    def cwl_dispatch(self, json):
        try:
            cwlwf, it_is_workflow = load_cwl(
                self.dag.default_args["cwl_workflow"], self.dag.default_args)
            cwl_context = {
                "outdir":
                mkdtemp(dir=get_folder(os.path.abspath(self.tmp_folder)),
                        prefix="dag_tmp_")
            }

            _jobloaderctx = jobloaderctx.copy()
            _jobloaderctx.update(cwlwf.metadata.get("$namespaces", {}))
            loader = Loader(_jobloaderctx)

            try:
                job_order_object = yaml.round_trip_load(
                    io.StringIO(initial_value=dumps(json)))
                job_order_object, _ = loader.resolve_all(
                    job_order_object,
                    file_uri(os.getcwd()) + "/",
                    checklinks=False)
            except Exception as e:
                _logger.error("Job Loader: {}".format(str(e)))

            job_order_object = init_job_order(job_order_object, None, cwlwf,
                                              loader, sys.stdout)

            cwl_context['promises'] = job_order_object

            logging.info('{0}: Final job: \n {1}'.format(
                self.task_id, dumps(cwl_context, indent=4)))

            return cwl_context

        except Exception as e:
            _logger.info('Dispatch Exception {0}: \n {1} {2}'.format(
                self.task_id, type(e), e))
            pass
        return None
def test_idmap() -> None:
    ldr = Loader({})
    ldr.add_context({
        "inputs": {
            "@id": "http://example.com/inputs",
            "mapSubject": "id",
            "mapPredicate": "a",
        },
        "outputs": {
            "@type": "@id",
            "identity": True
        },
        "id": "@id",
    })

    ra, _ = ldr.resolve_all(
        cmap({
            "id": "stuff",
            "inputs": {
                "zip": 1,
                "zing": 2
            },
            "outputs": ["out"],
            "other": {
                "n": 9
            },
        }),
        "http://example2.com/",
    )
    assert isinstance(ra, CommentedMap)

    assert "http://example2.com/#stuff" == ra["id"]
    for item in ra["inputs"]:
        if item["a"] == 2:
            assert "http://example2.com/#stuff/zing" == item["id"]
        else:
            assert "http://example2.com/#stuff/zip" == item["id"]
    assert ["http://example2.com/#stuff/out"] == ra["outputs"]
    assert {"n": 9} == ra["other"]
def test_Loader_initialisation_with_neither_TMP_HOME_set(tmp_dir_fixture):
    import os
    from schema_salad.ref_resolver import Loader
    from requests import Session

    # Ensure HOME is missing.
    if "HOME" in os.environ:
        del os.environ["HOME"]
    if "TMP" in os.environ:
        del os.environ["TMP"]

    loader = Loader(ctx={})
    assert isinstance(loader.session, Session)
Beispiel #29
0
def fetch_document(
    argsworkflow,  # type: Union[Text, Dict[Text, Any]]
    resolver=None,  # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
    fetcher_constructor=None
    # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
):
    # type: (...) -> Tuple[Loader, CommentedMap, Text]
    """Retrieve a CWL document."""

    document_loader = Loader(
        jobloaderctx, fetcher_constructor=fetcher_constructor)  # type: ignore

    uri = None  # type: Text
    workflowobj = None  # type: CommentedMap
    if isinstance(argsworkflow, string_types):
        split = urllib.parse.urlsplit(argsworkflow)
        # In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that
        if split.scheme and split.scheme in [u'http', u'https', u'file']:
            uri = argsworkflow
        elif os.path.exists(os.path.abspath(argsworkflow)):
            uri = file_uri(str(os.path.abspath(argsworkflow)))
        elif resolver:
            uri = resolver(document_loader, argsworkflow)

        if uri is None:
            raise ValidationException("Not found: '%s'" % argsworkflow)

        if argsworkflow != uri:
            _logger.info("Resolved '%s' to '%s'", argsworkflow, uri)

        fileuri = urllib.parse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = "#" + Text(id(argsworkflow))
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
def expand_cwl(cwl, uri, g):
    try:
        document_loader = Loader({
            "cwl": "https://w3id.org/cwl/cwl#",
            "id": "@id"
        })
        cwl = yaml.load(cwl)
        document_loader, avsc_names, processobj, metadata, uri = validate_document(
            document_loader, cwl, uri, strict=False)
        jsonld_context.makerdf(uri, processobj, document_loader.ctx, graph=g)
        sys.stderr.write("\n%s: imported ok\n" % (uri))
    except Exception as e:
        sys.stderr.write("\n%s: %s\n" % (uri, e))
Beispiel #31
0
def fetch_document(argsworkflow):
    # type: (Union[Text, Text, dict[Text, Any]]) -> Tuple[Loader, Dict[Text, Any], Text]
    """Retrieve a CWL document."""
    document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"})

    uri = None  # type: Text
    workflowobj = None  # type: Dict[Text, Any]
    if isinstance(argsworkflow, basestring):
        split = urlparse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        else:
            uri = "file://" + os.path.abspath(argsworkflow)
        fileuri = urlparse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        workflowobj = argsworkflow
        uri = "#" + Text(id(argsworkflow))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
def test_secondaryFile_dsl_ref() -> None:
    ldr = Loader({})
    ldr.add_context({"secondaryFiles": {"secondaryFilesDSL": True}})

    ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo"}), "")
    assert {"secondaryFiles": {"pattern": ".foo", "required": None}} == ra

    ra, _ = ldr.resolve_all(cmap({"secondaryFiles": ".foo?"}), "")
    assert {"secondaryFiles": {"pattern": ".foo", "required": False}} == ra

    ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo"]}), "")
    assert {"secondaryFiles": [{"pattern": ".foo", "required": None}]} == ra

    ra, _ = ldr.resolve_all(cmap({"secondaryFiles": [".foo?"]}), "")
    assert {"secondaryFiles": [{"pattern": ".foo", "required": False}]} == ra
Beispiel #33
0
def test_fetch_inject_id():
    l1 = Loader({"id": "@id"})
    furi1 = file_uri(get_data("schema_salad/tests/inject-id1.yml")).lower()
    r1, _ = l1.resolve_ref(furi1)
    assert {"id": furi1 + "#foo", "bar": "baz"} == r1
    assert [furi1,
            furi1 + "#foo"] == sorted(list(k.lower() for k in l1.idx.keys()))

    l2 = Loader({"id": "@id"})
    furi2 = file_uri(get_data("schema_salad/tests/inject-id2.yml")).lower()
    r2, _ = l2.resolve_ref(furi2)
    assert {"id": furi2, "bar": "baz"} == r2
    assert [furi2] == sorted(list(k.lower() for k in l2.idx.keys()))

    l3 = Loader({"id": "@id"})
    furi3 = file_uri(get_data("schema_salad/tests/inject-id3.yml")).lower()
    r3, _ = l3.resolve_ref(furi3)
    assert {"id": "http://example.com", "bar": "baz"} == r3
    assert [furi3, "http://example.com"
            ] == sorted(list(k.lower() for k in l3.idx.keys()))
def fetch_document(argsworkflow,   # type: Union[Text, dict[Text, Any]]
                   resolver=None,  # type: Callable[[Loader, Union[Text, dict[Text, Any]]], Text]
                   fetcher_constructor=None  # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
                   ):
    # type: (...) -> Tuple[Loader, CommentedMap, Text]
    """Retrieve a CWL document."""

    document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"},
                             fetcher_constructor=fetcher_constructor)

    uri = None  # type: Text
    workflowobj = None  # type: CommentedMap
    if isinstance(argsworkflow, basestring):
        split = urlparse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        elif os.path.exists(os.path.abspath(argsworkflow)):
            uri = "file://" + os.path.abspath(argsworkflow)
        elif resolver:
            uri = resolver(document_loader, argsworkflow)

        if uri is None:
            raise ValidationException("Not found: '%s'" % argsworkflow)

        if argsworkflow != uri:
            _logger.info("Resolved '%s' to '%s'", argsworkflow, uri)

        fileuri = urlparse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = "#" + Text(id(argsworkflow))
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
def test_attachments():
    furi = file_uri(get_data("schema_salad/tests/multidoc.yml"))

    l1 = Loader({})
    r1, _ = l1.resolve_ref(furi)
    with open(get_data("schema_salad/tests/multidoc.yml"), "rt") as f:
        content = f.read()
        assert {"foo": "bar", "baz": content, "quux": content} == r1

    l2 = Loader({}, allow_attachments=lambda x: x["foo"] == "bar")
    r2, _ = l2.resolve_ref(furi)
    assert {
        "foo": "bar",
        "baz": "This is the {first attachment}.\n",
        "quux": "This is the [second attachment].",
    } == r2

    l3 = Loader({}, allow_attachments=lambda x: x["foo"] == "baz")
    r3, _ = l3.resolve_ref(furi)
    with open(get_data("schema_salad/tests/multidoc.yml"), "rt") as f:
        content = f.read()
        assert {"foo": "bar", "baz": content, "quux": content} == r3
Beispiel #36
0
    def execute(self, context):
        initialized_job_order_object = init_job_order(self.dag.default_args["job_data"]["content"],
                                                      Namespace(),
                                                      self.dag.cwlwf,
                                                      Loader(jobloaderctx.copy()),
                                                      sys.stdout)

        updated_job_order_object = {}
        for index, inp in enumerate(self.dag.cwlwf.tool["inputs"]):
            inp_id = shortname(inp["id"])
            if inp_id.split("/")[-1] in initialized_job_order_object:
                updated_job_order_object[inp_id] = initialized_job_order_object[inp_id.split("/")[-1]]

        updated_job_order_object["tmp_folder"] = tempfile.mkdtemp(dir=self.dag.default_args["job_data"]["content"].get("tmp_folder", DEFAULT_TMP_FOLDER), prefix="dag_tmp_")
        updated_job_order_object["output_folder"] = self.dag.default_args["job_data"]["content"]["output_folder"]
        logging.info("Dispatch job\n{}".format(json.dumps(updated_job_order_object, indent=4)))
        return {"outputs": updated_job_order_object}
def test_fetch_inject_id() -> None:
    path = get_data("schema_salad/tests/inject-id1.yml")
    assert path
    if is_fs_case_sensitive(os.path.dirname(path)):

        def lower(item: str) -> str:
            return item

    else:

        def lower(item: str) -> str:
            return item.lower()

    l1 = Loader({"id": "@id"})
    furi1 = file_uri(path)
    r1, _ = l1.resolve_ref(furi1)
    assert {"id": furi1 + "#foo", "bar": "baz"} == r1
    assert [lower(furi1), lower(furi1 + "#foo")] == sorted(
        list(lower(k) for k in l1.idx.keys())
    )

    l2 = Loader({"id": "@id"})
    path2 = get_data("schema_salad/tests/inject-id2.yml")
    assert path2
    furi2 = file_uri(path2)
    r2, _ = l2.resolve_ref(furi2)
    assert {"id": furi2, "bar": "baz"} == r2
    assert [lower(furi2)] == sorted(list(lower(k) for k in l2.idx.keys()))

    l3 = Loader({"id": "@id"})
    path3 = get_data("schema_salad/tests/inject-id3.yml")
    assert path3
    furi3 = file_uri(path3)
    r3, _ = l3.resolve_ref(furi3)
    assert {"id": "http://example.com", "bar": "baz"} == r3
    assert [lower(furi3), "http://example.com"] == sorted(
        list(lower(k) for k in l3.idx.keys())
    )
Beispiel #38
0
def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False,
                   stdout=sys.stdout, make_fs_access=None):
    # type: (argparse.Namespace, Process, IO[Any], bool, bool, IO[Any], Type[StdFsAccess]) -> Union[int, Tuple[Dict[Text, Any], Text]]

    job_order_object = None

    if args.conformance_test:
        loader = Loader({})
    else:
        jobloaderctx = {
            u"path": {u"@type": u"@id"},
            u"location": {u"@type": u"@id"},
            u"format": {u"@type": u"@id"},
            u"id": u"@id"}
        jobloaderctx.update(t.metadata.get("$namespaces", {}))
        loader = Loader(jobloaderctx)

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, "")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        try:
            job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)
        except Exception as e:
            _logger.error(Text(e), exc_info=args.debug)
            return 1
        toolparser = None
    else:
        input_basedir = args.basedir if args.basedir else os.getcwd()
        namemap = {}  # type: Dict[Text, Text]
        toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                return 0
            cmd_line = vars(toolparser.parse_args(args.job_order))

            if cmd_line["job_order"]:
                try:
                    input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"]))
                    job_order_object = loader.resolve_ref(cmd_line["job_order"])
                except Exception as e:
                    _logger.error(Text(e), exc_info=args.debug)
                    return 1
            else:
                job_order_object = {"id": args.workflow}

            job_order_object.update({namemap[k]: v for k,v in cmd_line.items()})

            _logger.debug(u"Parsed job order from command line: %s", json.dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object and len(t.tool["inputs"]) > 0:
        if toolparser:
            print u"\nOptions for %s " % args.workflow
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required, use --help for details")
        return 1

    if print_input_deps:
        printdeps(job_order_object, loader, stdout, relative_deps, "",
                  basedir=u"file://%s/" % input_basedir)
        return 0

    def pathToLoc(p):
        if "location" not in p and "path" in p:
            p["location"] = p["path"]
            del p["path"]

    adjustDirObjs(job_order_object, pathToLoc)
    adjustFileObjs(job_order_object, pathToLoc)
    normalizeFilesDirs(job_order_object)
    adjustDirObjs(job_order_object, cast(Callable[..., Any],
        functools.partial(getListing, make_fs_access(input_basedir))))

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]

    return (job_order_object, input_basedir)
Beispiel #39
0
def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False,
                   stdout=sys.stdout, make_fs_access=None, fetcher_constructor=None):
    # type: (argparse.Namespace, Process, IO[Any], bool, bool, IO[Any], Callable[[Text], StdFsAccess], Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]) -> Union[int, Tuple[Dict[Text, Any], Text]]

    job_order_object = None

    _jobloaderctx = jobloaderctx.copy()
    _jobloaderctx.update(t.metadata.get("$namespaces", {}))
    loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor)

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)  # type: ignore
        job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        try:
            job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)
        except Exception as e:
            _logger.error(Text(e), exc_info=args.debug)
            return 1
        toolparser = None
    else:
        input_basedir = args.basedir if args.basedir else os.getcwd()
        namemap = {}  # type: Dict[Text, Text]
        records = []  # type: List[Text]
        toolparser = generate_parser(
            argparse.ArgumentParser(prog=args.workflow), t, namemap, records)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                return 0
            cmd_line = vars(toolparser.parse_args(args.job_order))
            for record_name in records:
                record = {}
                record_items = {
                    k: v for k, v in cmd_line.iteritems()
                    if k.startswith(record_name)}
                for key, value in record_items.iteritems():
                    record[key[len(record_name) + 1:]] = value
                    del cmd_line[key]
                cmd_line[str(record_name)] = record

            if cmd_line["job_order"]:
                try:
                    input_basedir = args.basedir if args.basedir else os.path.abspath(
                        os.path.dirname(cmd_line["job_order"]))
                    job_order_object = loader.resolve_ref(cmd_line["job_order"])
                except Exception as e:
                    _logger.error(Text(e), exc_info=args.debug)
                    return 1
            else:
                job_order_object = {"id": args.workflow}

            del cmd_line["job_order"]

            job_order_object.update({namemap[k]: v for k, v in cmd_line.items()})

            if _logger.isEnabledFor(logging.DEBUG):
                _logger.debug(u"Parsed job order from command line: %s", json.dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object and len(t.tool["inputs"]) > 0:
        if toolparser:
            print(u"\nOptions for {} ".format(args.workflow))
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required, use --help for details")
        return 1

    if print_input_deps:
        printdeps(job_order_object, loader, stdout, relative_deps, "",
                  basedir=file_uri(input_basedir + "/"))
        return 0

    def pathToLoc(p):
        if "location" not in p and "path" in p:
            p["location"] = p["path"]
            del p["path"]

    visit_class(job_order_object, ("File", "Directory"), pathToLoc)
    adjustDirObjs(job_order_object, trim_listing)
    normalizeFilesDirs(job_order_object)

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]

    return (job_order_object, input_basedir)
def validate_document(document_loader,   # type: Loader
                      workflowobj,       # type: CommentedMap
                      uri,               # type: Text
                      enable_dev=False,  # type: bool
                      strict=True,       # type: bool
                      preprocess_only=False,    # type: bool
                      fetcher_constructor=None  # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
                      ):
    # type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text]
    """Validate a CWL document."""

    if isinstance(workflowobj, list):
        workflowobj = {
            "$graph": workflowobj
        }

    if not isinstance(workflowobj, dict):
        raise ValueError("workflowjobj must be a dict")

    jobobj = None
    if "cwl:tool" in workflowobj:
        jobobj, _ = document_loader.resolve_all(workflowobj, uri)
        uri = urlparse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"])
        del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"]
        workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1]

    fileuri = urlparse.urldefrag(uri)[0]

    if "cwlVersion" in workflowobj:
        if not isinstance(workflowobj["cwlVersion"], (str, Text)):
            raise Exception("'cwlVersion' must be a string, got %s" % type(workflowobj["cwlVersion"]))
        workflowobj["cwlVersion"] = re.sub(
            r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "",
            workflowobj["cwlVersion"])
    else:
        _logger.warn("No cwlVersion found, treating this file as draft-2.")
        workflowobj["cwlVersion"] = "draft-2"

    if workflowobj["cwlVersion"] == "draft-2":
        workflowobj = cast(CommentedMap, cmap(update._draft2toDraft3dev1(
            workflowobj, document_loader, uri, update_steps=False)))
        if "@graph" in workflowobj:
            workflowobj["$graph"] = workflowobj["@graph"]
            del workflowobj["@graph"]

    (sch_document_loader, avsc_names) = \
        process.get_schema(workflowobj["cwlVersion"])[:2]

    if isinstance(avsc_names, Exception):
        raise avsc_names

    processobj = None  # type: Union[CommentedMap, CommentedSeq, unicode]
    document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph,
                  idx=document_loader.idx, cache=sch_document_loader.cache,
                             fetcher_constructor=fetcher_constructor)

    workflowobj["id"] = fileuri
    processobj, metadata = document_loader.resolve_all(workflowobj, fileuri)
    if not isinstance(processobj, (CommentedMap, CommentedSeq)):
        raise ValidationException("Workflow must be a dict or list.")

    if not metadata:
        if not isinstance(processobj, dict):
            raise ValidationException("Draft-2 workflows must be a dict.")
        metadata = cast(CommentedMap, cmap({"$namespaces": processobj.get("$namespaces", {}),
                         "$schemas": processobj.get("$schemas", []),
                         "cwlVersion": processobj["cwlVersion"]},
                        fn=fileuri))

    _convert_stdstreams_to_files(workflowobj)

    if preprocess_only:
        return document_loader, avsc_names, processobj, metadata, uri

    schema.validate_doc(avsc_names, processobj, document_loader, strict)

    if metadata.get("cwlVersion") != update.LATEST:
        processobj = cast(CommentedMap, cmap(update.update(
            processobj, document_loader, fileuri, enable_dev, metadata)))

    if jobobj:
        metadata[u"cwl:defaults"] = jobobj

    return document_loader, avsc_names, processobj, metadata, uri
Beispiel #41
0
def validate_document(document_loader,           # type: Loader
                      workflowobj,               # type: CommentedMap
                      uri,                       # type: Text
                      overrides,                 # type: List[Dict]
                      metadata,                  # type: Dict[Text, Any]
                      enable_dev=False,          # type: bool
                      strict=True,               # type: bool
                      preprocess_only=False,     # type: bool
                      fetcher_constructor=None,  # type: FetcherConstructorType
                      skip_schemas=None,         # type: bool
                      do_validate=True           # type: bool
                     ):
    # type: (...) -> Tuple[Loader, schema.Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text]
    """Validate a CWL document."""

    if isinstance(workflowobj, MutableSequence):
        workflowobj = cmap({
            "$graph": workflowobj
        }, fn=uri)

    if not isinstance(workflowobj, MutableMapping):
        raise ValueError("workflowjobj must be a dict, got '{}': {}".format(
            type(workflowobj), workflowobj))

    jobobj = None
    if "cwl:tool" in workflowobj:
        job_loader = default_loader(fetcher_constructor)  # type: ignore
        jobobj, _ = job_loader.resolve_all(workflowobj, uri, checklinks=do_validate)
        uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"])
        del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"]

        if isinstance(jobobj, CommentedMap) and "http://commonwl.org/cwltool#overrides" in jobobj:
            overrides.extend(resolve_overrides(jobobj, uri, uri))
            del jobobj["http://commonwl.org/cwltool#overrides"]

        workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1]

    fileuri = urllib.parse.urldefrag(uri)[0]
    if "cwlVersion" not in workflowobj:
        if 'cwlVersion' in metadata:
            workflowobj['cwlVersion'] = metadata['cwlVersion']
        else:
            raise ValidationException(
                "No cwlVersion found. "
                "Use the following syntax in your CWL document to declare "
                "the version: cwlVersion: <version>.\n"
                "Note: if this is a CWL draft-2 (pre v1.0) document then it "
                "will need to be upgraded first.")

    if not isinstance(workflowobj["cwlVersion"], string_types):
        with SourceLine(workflowobj, "cwlVersion", ValidationException):
            raise ValidationException("'cwlVersion' must be a string, "
                                      "got {}".format(
                                          type(workflowobj["cwlVersion"])))
    # strip out version
    workflowobj["cwlVersion"] = re.sub(
        r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "",
        workflowobj["cwlVersion"])
    if workflowobj["cwlVersion"] not in list(ALLUPDATES):
        # print out all the Supported Versions of cwlVersion
        versions = []
        for version in list(ALLUPDATES):
            if "dev" in version:
                version += " (with --enable-dev flag only)"
            versions.append(version)
        versions.sort()
        raise ValidationException(
            "The CWL reference runner no longer supports pre CWL v1.0 "
            "documents. Supported versions are: "
            "\n{}".format("\n".join(versions)))

    (sch_document_loader, avsc_names) = \
        process.get_schema(workflowobj["cwlVersion"])[:2]

    if isinstance(avsc_names, Exception):
        raise avsc_names

    processobj = None  # type: Union[CommentedMap, CommentedSeq, Text, None]
    document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph,
                             idx=document_loader.idx, cache=sch_document_loader.cache,
                             fetcher_constructor=fetcher_constructor, skip_schemas=skip_schemas)

    _add_blank_ids(workflowobj)

    workflowobj["id"] = fileuri
    processobj, new_metadata = document_loader.resolve_all(
        workflowobj, fileuri, checklinks=do_validate)
    if not isinstance(processobj, (CommentedMap, CommentedSeq)):
        raise ValidationException("Workflow must be a dict or list.")

    if not new_metadata and isinstance(processobj, CommentedMap):
        new_metadata = cast(CommentedMap, cmap(
            {"$namespaces": processobj.get("$namespaces", {}),
             "$schemas": processobj.get("$schemas", []),
             "cwlVersion": processobj["cwlVersion"]}, fn=fileuri))

    _convert_stdstreams_to_files(workflowobj)

    if preprocess_only:
        return document_loader, avsc_names, processobj, new_metadata, uri

    if do_validate:
        schema.validate_doc(avsc_names, processobj, document_loader, strict)

    if new_metadata.get("cwlVersion") != update.LATEST:
        processobj = cast(CommentedMap, cmap(update.update(
            processobj, document_loader, fileuri, enable_dev, new_metadata)))

    if jobobj is not None:
        new_metadata[u"cwl:defaults"] = jobobj

    if overrides:
        new_metadata[u"cwltool:overrides"] = overrides

    return document_loader, avsc_names, processobj, new_metadata, uri
Beispiel #42
0
def load_job_order(args, t, parser):

    job_order_object = None

    if args.conformance_test:
        loader = Loader({})
    else:
        jobloaderctx = {"path": {"@type": "@id"}, "format": {"@type": "@id"}}
        jobloaderctx.update(t.metadata.get("$namespaces", {}))
        loader = Loader(jobloaderctx)

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, "")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        try:
            job_order_object, _ = loader.resolve_ref(job_order_file)
        except Exception as e:
            _logger.error(e, exc_info=(e if args.debug else False))
            return 1
        toolparser = None
    else:
        input_basedir = args.basedir if args.basedir else os.getcwd()
        namemap = {}
        toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                return 0
            cmd_line = vars(toolparser.parse_args(args.job_order))

            if cmd_line["job_order"]:
                try:
                    input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"]))
                    job_order_object = loader.resolve_ref(cmd_line["job_order"])
                except Exception as e:
                    _logger.error(e, exc_info=(e if args.debug else False))
                    return 1
            else:
                job_order_object = {}

            job_order_object.update({namemap[k]: v for k,v in cmd_line.items()})

            _logger.debug("Parsed job order from command line: %s", job_order_object)
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object and len(t.tool["inputs"]) > 0:
        parser.print_help()
        if toolparser:
            print "\nOptions for %s " % args.workflow
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required")
        return 1

    return (job_order_object, input_basedir)
Beispiel #43
0
def load_overrides(ov, base_url):  # type: (Text, Text) -> List[Dict[Text, Any]]
    ovloader = Loader(overrides_ctx)
    return resolve_overrides(ovloader.fetch(ov), ov, base_url)
Beispiel #44
0
def resolve_and_validate_document(loadingContext,
                      workflowobj,
                      uri,
                      preprocess_only=False,     # type: bool
                      skip_schemas=None,         # type: bool
                     ):
    # type: (...) -> Tuple[LoadingContext, Text]
    """Validate a CWL document."""

    loadingContext = loadingContext.copy()

    if not isinstance(workflowobj, MutableMapping):
        raise ValueError("workflowjobj must be a dict, got '{}': {}".format(
            type(workflowobj), workflowobj))

    jobobj = None
    if "cwl:tool" in workflowobj:
        jobobj, _ = loadingContext.loader.resolve_all(workflowobj, uri, checklinks=loadingContext.do_validate)
        uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"])
        del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"]

        workflowobj = fetch_document(uri, loadingContext)[1]

    fileuri = urllib.parse.urldefrag(uri)[0]

    cwlVersion = workflowobj.get("cwlVersion")
    if not cwlVersion:
        fileobj = fetch_document(fileuri, loadingContext)[1]
        cwlVersion = fileobj.get("cwlVersion")
        if not cwlVersion:
            raise ValidationException(
                "No cwlVersion found. "
                "Use the following syntax in your CWL document to declare "
                "the version: cwlVersion: <version>.\n"
                "Note: if this is a CWL draft-2 (pre v1.0) document then it "
                "will need to be upgraded first.")

    if not isinstance(cwlVersion, string_types):
        with SourceLine(workflowobj, "cwlVersion", ValidationException):
            raise ValidationException("'cwlVersion' must be a string, "
                                      "got {}".format(
                                          type(cwlVersion)))
    # strip out version
    cwlVersion = re.sub(
        r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "",
        cwlVersion)
    if cwlVersion not in list(ALLUPDATES):
        # print out all the Supported Versions of cwlVersion
        versions = []
        for version in list(ALLUPDATES):
            if "dev" in version:
                version += " (with --enable-dev flag only)"
            versions.append(version)
        versions.sort()
        raise ValidationException(
            "The CWL reference runner no longer supports pre CWL v1.0 "
            "documents. Supported versions are: "
            "\n{}".format("\n".join(versions)))

    if isinstance(jobobj, CommentedMap) and "http://commonwl.org/cwltool#overrides" in jobobj:
        loadingContext.overrides_list.extend(resolve_overrides(jobobj, uri, uri))
        del jobobj["http://commonwl.org/cwltool#overrides"]

    if isinstance(jobobj, CommentedMap) and "https://w3id.org/cwl/cwl#requirements" in jobobj:
        if cwlVersion not in ("v1.1.0-dev1",):
            raise ValidationException(
                    "`cwl:requirements` in the input object is not part of CWL "
                    "v1.0. You can adjust to use `cwltool:overrides` instead; or you "
                    "can set the cwlVersion to v1.1.0-dev1 or greater and re-run with "
                    "--enable-dev.")
        loadingContext.overrides_list.append({"overrideTarget": uri,
                                              "requirements": jobobj["https://w3id.org/cwl/cwl#requirements"]})
        del jobobj["https://w3id.org/cwl/cwl#requirements"]

    (sch_document_loader, avsc_names) = \
        process.get_schema(cwlVersion)[:2]

    if isinstance(avsc_names, Exception):
        raise avsc_names

    processobj = None  # type: Union[CommentedMap, CommentedSeq, Text, None]
    document_loader = Loader(sch_document_loader.ctx,
                             schemagraph=sch_document_loader.graph,
                             idx=loadingContext.loader.idx,
                             cache=sch_document_loader.cache,
                             fetcher_constructor=loadingContext.fetcher_constructor,
                             skip_schemas=skip_schemas)

    if cwlVersion == "v1.0":
        _add_blank_ids(workflowobj)

    workflowobj["id"] = fileuri
    processobj, metadata = document_loader.resolve_all(
        workflowobj, fileuri, checklinks=loadingContext.do_validate)
    if not isinstance(processobj, (CommentedMap, CommentedSeq)):
        raise ValidationException("Workflow must be a CommentedMap or CommentedSeq.")
    if not isinstance(metadata, CommentedMap):
        raise ValidationException("metadata must be a CommentedMap, was %s" % type(metadata))

    _convert_stdstreams_to_files(workflowobj)

    if preprocess_only:
        return loadingContext, uri

    if loadingContext.do_validate:
        schema.validate_doc(avsc_names, processobj, document_loader, loadingContext.strict)

    if loadingContext.do_update:
        processobj = cast(CommentedMap, cmap(update.update(
            processobj, document_loader, fileuri, loadingContext.enable_dev, metadata)))
        if isinstance(processobj, MutableMapping):
            document_loader.idx[processobj["id"]] = processobj
        elif isinstance(processobj, MutableSequence):
            document_loader.idx[metadata["id"]] = metadata
            for po in processobj:
                document_loader.idx[po["id"]] = po

    if jobobj is not None:
        loadingContext.jobdefaults = jobobj

    loadingContext.loader = document_loader
    loadingContext.avsc_names = avsc_names
    loadingContext.metadata = metadata

    return loadingContext, uri
Beispiel #45
0
def load_job_order(args, t, parser, stdin, print_input_deps=False, relative_deps=False, stdout=sys.stdout):
    # type: (argparse.Namespace, Process, argparse.ArgumentParser, IO[Any], bool, bool, IO[Any]) -> Union[int,Tuple[Dict[str,Any],str]]

    job_order_object = None

    if args.conformance_test:
        loader = Loader({})
    else:
        jobloaderctx = {
                "path": {"@type": "@id"},
                "format": {"@type": "@id"},
                "id": "@id"}
        jobloaderctx.update(t.metadata.get("$namespaces", {}))
        loader = Loader(jobloaderctx)

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, "")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        try:
            job_order_object, _ = loader.resolve_ref(job_order_file)
        except Exception as e:
            _logger.error(str(e), exc_info=(e if args.debug else False))
            return 1
        toolparser = None
    else:
        input_basedir = args.basedir if args.basedir else os.getcwd()
        namemap = {}  # type: Dict[str,str]
        toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                return 0
            cmd_line = vars(toolparser.parse_args(args.job_order))

            if cmd_line["job_order"]:
                try:
                    input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"]))
                    job_order_object = loader.resolve_ref(cmd_line["job_order"])
                except Exception as e:
                    _logger.error(str(e), exc_info=(e if args.debug else False))
                    return 1
            else:
                job_order_object = {"id": args.workflow}

            job_order_object.update({namemap[k]: v for k,v in cmd_line.items()})

            _logger.debug(u"Parsed job order from command line: %s", json.dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object and len(t.tool["inputs"]) > 0:
        parser.print_help()
        if toolparser:
            print u"\nOptions for %s " % args.workflow
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required")
        return 1

    if print_input_deps:
        printdeps(job_order_object, loader, stdout, relative_deps,
                  basedir=u"file://%s/" % input_basedir)
        return 0

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]

    return (job_order_object, input_basedir)
def main(args=None, executor=single_job_executor, makeTool=workflow.defaultMakeTool, parser=None):
    if args is None:
        args = sys.argv[1:]

    if parser is None:
        parser = arg_parser()

    args = parser.parse_args(args)

    if args.quiet:
        _logger.setLevel(logging.WARN)
    if args.debug:
        _logger.setLevel(logging.DEBUG)

    pkg = pkg_resources.require("cwltool")
    if pkg:
        if args.version:
            print "%s %s" % (sys.argv[0], pkg[0].version)
            return 0
        else:
            _logger.info("%s %s", sys.argv[0], pkg[0].version)

    if not args.workflow:
        parser.print_help()
        _logger.error("")
        _logger.error("CWL document required")
        return 1

    t = load_tool(args.workflow, args.update, args.strict, makeTool, args.debug)

    if type(t) == int:
        return t

    if args.print_rdf:
        printrdf(args.workflow, processobj, ctx, args.rdf_serializer)
        return 0

    if args.print_dot:
        printdot(args.workflow, processobj, ctx, args.rdf_serializer)
        return 0

    if args.tmp_outdir_prefix != 'tmp':
        # Use user defined temp directory (if it exists)
        args.tmp_outdir_prefix = os.path.abspath(args.tmp_outdir_prefix)
        if not os.path.exists(args.tmp_outdir_prefix):
            _logger.error("Intermediate output directory prefix doesn't exist, reverting to default")
            return 1

    if args.tmpdir_prefix != 'tmp':
        # Use user defined prefix (if the folder exists)
        args.tmpdir_prefix = os.path.abspath(args.tmpdir_prefix)
        if not os.path.exists(args.tmpdir_prefix):
            _logger.error("Temporary directory prefix doesn't exist.")
            return 1

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    else:
        job_order_file = None

    if args.conformance_test:
        loader = Loader({})
    else:
        loader = Loader({"id": "@id", "path": {"@type": "@id"}})

    if job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        try:
            job_order_object, _ = loader.resolve_ref(job_order_file)
        except Exception as e:
            _logger.error(e, exc_info=(e if args.debug else False))
            return 1
        toolparser = None
    else:
        input_basedir = args.basedir if args.basedir else os.getcwd()
        namemap = {}
        toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                return 0
            cmd_line = vars(toolparser.parse_args(args.job_order))

            if cmd_line["job_order"]:
                try:
                    input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"]))
                    job_order_object = loader.resolve_ref(cmd_line["job_order"])
                except Exception as e:
                    _logger.error(e, exc_info=(e if args.debug else False))
                    return 1
            else:
                job_order_object = {}

            job_order_object.update({namemap[k]: v for k,v in cmd_line.items()})
            _logger.debug("Parsed job order from command line: %s", job_order_object)
        else:
            job_order_object = None

    if not job_order_object:
        parser.print_help()
        if toolparser:
            print "\nOptions for %s " % args.workflow
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required")
        return 1

    try:
        out = executor(t, job_order_object,
                       input_basedir, args,
                       conformance_test=args.conformance_test,
                       dry_run=args.dry_run,
                       outdir=args.outdir,
                       tmp_outdir_prefix=args.tmp_outdir_prefix,
                       use_container=args.use_container,
                       preserve_environment=args.preserve_environment,
                       pull_image=args.enable_pull,
                       rm_container=args.rm_container,
                       tmpdir_prefix=args.tmpdir_prefix,
                       rm_tmpdir=args.rm_tmpdir,
                       makeTool=makeTool,
                       move_outputs=args.move_outputs
                       )
        # This is the workflow output, it needs to be written
        sys.stdout.write(json.dumps(out, indent=4))
    except (validate.ValidationException) as e:
        _logger.error("Input object failed validation:\n%s", e, exc_info=(e if args.debug else False))
        return 1
    except workflow.WorkflowException as e:
        _logger.error("Workflow error:\n  %s", e, exc_info=(e if args.debug else False))
        return 1

    return 0
Beispiel #47
0
def validate_document(document_loader,  # type: Loader
                      workflowobj,  # type: CommentedMap
                      uri,  # type: Text
                      enable_dev=False,  # type: bool
                      strict=True,  # type: bool
                      preprocess_only=False,  # type: bool
                      fetcher_constructor=None,  # type: FetcherConstructorType
                      skip_schemas=None,  # type: bool
                      overrides=None,  # type: List[Dict]
                      metadata=None,  # type: Optional[Dict]
                      ):
    # type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text]
    """Validate a CWL document."""

    if isinstance(workflowobj, list):
        workflowobj = cmap({
            "$graph": workflowobj
        }, fn=uri)

    if not isinstance(workflowobj, dict):
        raise ValueError("workflowjobj must be a dict, got '%s': %s" % (type(workflowobj), workflowobj))

    jobobj = None
    if "cwl:tool" in workflowobj:
        job_loader = default_loader(fetcher_constructor)  # type: ignore
        jobobj, _ = job_loader.resolve_all(workflowobj, uri)
        uri = urllib.parse.urljoin(uri, workflowobj["https://w3id.org/cwl/cwl#tool"])
        del cast(dict, jobobj)["https://w3id.org/cwl/cwl#tool"]

        if "http://commonwl.org/cwltool#overrides" in jobobj:
            overrides.extend(resolve_overrides(jobobj, uri, uri))
            del jobobj["http://commonwl.org/cwltool#overrides"]

        workflowobj = fetch_document(uri, fetcher_constructor=fetcher_constructor)[1]

    fileuri = urllib.parse.urldefrag(uri)[0]
    if "cwlVersion" not in workflowobj:
        if metadata and 'cwlVersion' in metadata:
            workflowobj['cwlVersion'] = metadata['cwlVersion']
        else:
            raise ValidationException("No cwlVersion found."
                "Use the following syntax in your CWL document to declare "
                "the version: cwlVersion: <version>")

    if not isinstance(workflowobj["cwlVersion"], (str, Text)):
        raise Exception("'cwlVersion' must be a string, got %s" % type(workflowobj["cwlVersion"]))
    # strip out version
    workflowobj["cwlVersion"] = re.sub(
        r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "",
        workflowobj["cwlVersion"])
    if workflowobj["cwlVersion"] not in list(ALLUPDATES):
        # print out all the Supported Versions of cwlVersion
        versions = list(ALLUPDATES)  # ALLUPDATES is a dict
        versions.sort()
        raise ValidationException("'cwlVersion' not valid. Supported CWL versions are: \n{}".format("\n".join(versions)))

    if workflowobj["cwlVersion"] == "draft-2":
        workflowobj = cast(CommentedMap, cmap(update._draft2toDraft3dev1(
            workflowobj, document_loader, uri, update_steps=False)))
        if "@graph" in workflowobj:
            workflowobj["$graph"] = workflowobj["@graph"]
            del workflowobj["@graph"]

    (sch_document_loader, avsc_names) = \
        process.get_schema(workflowobj["cwlVersion"])[:2]

    if isinstance(avsc_names, Exception):
        raise avsc_names

    processobj = None  # type: Union[CommentedMap, CommentedSeq, Text]
    document_loader = Loader(sch_document_loader.ctx, schemagraph=sch_document_loader.graph,
                             idx=document_loader.idx, cache=sch_document_loader.cache,
                             fetcher_constructor=fetcher_constructor, skip_schemas=skip_schemas)

    _add_blank_ids(workflowobj)

    workflowobj["id"] = fileuri
    processobj, new_metadata = document_loader.resolve_all(workflowobj, fileuri)
    if not isinstance(processobj, (CommentedMap, CommentedSeq)):
        raise ValidationException("Workflow must be a dict or list.")

    if not new_metadata:
        if not isinstance(processobj, dict):
            raise ValidationException("Draft-2 workflows must be a dict.")
        new_metadata = cast(CommentedMap, cmap(
            {"$namespaces": processobj.get("$namespaces", {}),
             "$schemas": processobj.get("$schemas", []),
             "cwlVersion": processobj["cwlVersion"]}, fn=fileuri))

    _convert_stdstreams_to_files(workflowobj)

    if preprocess_only:
        return document_loader, avsc_names, processobj, new_metadata, uri

    schema.validate_doc(avsc_names, processobj, document_loader, strict)

    if new_metadata.get("cwlVersion") != update.LATEST:
        processobj = cast(CommentedMap, cmap(update.update(
            processobj, document_loader, fileuri, enable_dev, new_metadata)))

    if jobobj:
        new_metadata[u"cwl:defaults"] = jobobj

    if overrides:
        new_metadata[u"cwltool:overrides"] = overrides

    return document_loader, avsc_names, processobj, new_metadata, uri