Exemple #1
0
def load_job_order(args,   # type: argparse.Namespace
                   stdin,  # type: IO[Any]
                   fetcher_constructor,  # Fetcher
                   overrides,  # type: List[Dict[Text, Any]]
                   tool_file_uri  # type: Text
):
    # type: (...) -> Tuple[Dict[Text, Any], Text, Loader]

    job_order_object = None

    _jobloaderctx = jobloaderctx.copy()
    loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor)  # type: ignore

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)

    if job_order_object and "http://commonwl.org/cwltool#overrides" in job_order_object:
        overrides.extend(resolve_overrides(job_order_object, file_uri(job_order_file), tool_file_uri))
        del job_order_object["http://commonwl.org/cwltool#overrides"]

    if not job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()

    return (job_order_object, input_basedir, loader)
def revmap_file(builder, outdir, f):
    # type: (Builder, Text, Dict[Text, Any]) -> Union[Dict[Text, Any], None]

    """Remap a file from internal path to external path.

    For Docker, this maps from the path inside tho container to the path
    outside the container. Recognizes files in the pathmapper or remaps
    internal output directories to the external directory.
    """

    split = urllib.parse.urlsplit(outdir)
    if not split.scheme:
        outdir = file_uri(str(outdir))

    # builder.outdir is the inner (container/compute node) output directory
    # outdir is the outer (host/storage system) output directory

    if "location" in f and "path" not in f:
        if f["location"].startswith("file://"):
            f["path"] = convert_pathsep_to_unix(uri_file_path(f["location"]))
        else:
            return f

    if "path" in f:
        path = f["path"]
        uripath = file_uri(path)
        del f["path"]

        if "basename" not in f:
            f["basename"] = os.path.basename(path)

        assert builder.pathmapper is not None
        revmap_f = builder.pathmapper.reversemap(path)

        if revmap_f and not builder.pathmapper.mapper(revmap_f[0]).type.startswith("Writable"):
            f["location"] = revmap_f[1]
        elif uripath == outdir or uripath.startswith(outdir+os.sep):
            f["location"] = file_uri(path)
        elif path == builder.outdir or path.startswith(builder.outdir+os.sep):
            f["location"] = builder.fs_access.join(outdir, path[len(builder.outdir) + 1:])
        elif not os.path.isabs(path):
            f["location"] = builder.fs_access.join(outdir, path)
        else:
            raise WorkflowException(u"Output file path %s must be within designated output directory (%s) or an input "
                                    u"file pass through." % (path, builder.outdir))
        return f

    raise WorkflowException(u"Output File object is missing both 'location' "
                            "and 'path' fields: %s" % f)
Exemple #3
0
def resolve_tool_uri(argsworkflow,  # type: Text
                     resolver=None,  # type: ResolverType
                     fetcher_constructor=None,  # type: FetcherConstructorType
                     document_loader=None  # type: Loader
                    ):  # type: (...) -> Tuple[Text, Text]

    uri = None  # type: Optional[Text]
    split = urllib.parse.urlsplit(argsworkflow)
    # In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that
    if split.scheme and split.scheme in [u'http', u'https', u'file']:
        uri = argsworkflow
    elif os.path.exists(os.path.abspath(argsworkflow)):
        uri = file_uri(str(os.path.abspath(argsworkflow)))
    elif resolver is not None:
        if document_loader is None:
            document_loader = default_loader(fetcher_constructor)  # type: ignore
        uri = resolver(document_loader, argsworkflow)

    if uri is None:
        raise ValidationException("Not found: '%s'" % argsworkflow)

    if argsworkflow != uri:
        _logger.info("Resolved '%s' to '%s'", argsworkflow, uri)

    fileuri = urllib.parse.urldefrag(uri)[0]
    return uri, fileuri
Exemple #4
0
def resolve_local(document_loader, uri):
    if uri.startswith("/"):
        return None
    shares = [os.environ.get("XDG_DATA_HOME", os.path.join(os.path.expanduser('~'), ".local", "share"))]
    shares.extend(os.environ.get("XDG_DATA_DIRS", "/usr/local/share/:/usr/share/").split(":"))
    shares = [os.path.join(s, "commonwl", uri) for s in shares]
    shares.insert(0, os.path.join(os.getcwd(), uri))

    _logger.debug("Search path is %s", shares)

    for s in shares:
        if os.path.exists(s):
            return file_uri(s)
        if os.path.exists("%s.cwl" % s):
            return file_uri(s)
    return None
Exemple #5
0
def load_job_order(args,                 # type: argparse.Namespace
                   stdin,                # type: IO[Any]
                   fetcher_constructor,  # Fetcher
                   overrides_list,       # type: List[Dict[Text, Any]]
                   tool_file_uri         # type: Text
                  ):  # type: (...) -> Tuple[Optional[MutableMapping[Text, Any]], Text, Loader]

    job_order_object = None
    job_order_file = None

    _jobloaderctx = jobloaderctx.copy()
    loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor)  # type: ignore

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object is not None:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file is not None:
        input_basedir = args.basedir if args.basedir \
            else os.path.abspath(os.path.dirname(job_order_file))
        job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)

    if job_order_object is not None and "http://commonwl.org/cwltool#overrides" in job_order_object:
        ov_uri = file_uri(job_order_file or input_basedir)
        overrides_list.extend(
            resolve_overrides(job_order_object, ov_uri, tool_file_uri))
        del job_order_object["http://commonwl.org/cwltool#overrides"]

    if job_order_object is None:
        input_basedir = args.basedir if args.basedir else os.getcwd()

    if job_order_object is not None and not isinstance(job_order_object, MutableMapping):
        _logger.error(
            'CWL input object at %s is not formatted correctly, it should be a '
            'JSON/YAML dictionay, not %s.\n'
            'Raw input object:\n%s', job_order_file or "stdin",
            type(job_order_object), job_order_object)
        sys.exit(1)
    return (job_order_object, input_basedir, loader)
Exemple #6
0
 def __call__(self, parser, namespace, values, option_string=None):
     # type: (argparse.ArgumentParser, argparse.Namespace, Union[AnyStr, Sequence[Any], None], AnyStr) -> None
     g = getattr(namespace,
                 self.dest  # type: ignore
                 )
     if not g:
         g = []
         setattr(namespace,
                 self.dest,  # type: ignore
                 g)
     g.append(
         {"class": self.objclass,
          "location": file_uri(str(os.path.abspath(cast(AnyStr, values))))})
Exemple #7
0
def test_load_cwlschema() -> None:
    path = get_data("tests/test_schema/CommonWorkflowLanguage.yml")
    assert path
    doc = cg_metaschema.load_document(
        file_uri(path),
        "",
        cg_metaschema.LoadingOptions(),
    )
    path2 = get_data("tests/cwl-pre.yml")
    assert path2
    with open(path2) as f:
        pre = json.load(f)
    saved = [d.save(relative_uris=False) for d in doc]
    assert saved == JsonDiffMatcher(pre)
Exemple #8
0
def test_load_by_yaml_metaschema(metaschema_pre: Any) -> None:
    path = get_data("metaschema/metaschema.yml")
    assert path
    with open(path) as path_handle:
        yaml = YAML()
        yaml.preserve_quotes = True  # type: ignore
        yaml_doc = yaml.load(path_handle)
    doc = cg_metaschema.load_document_by_yaml(
        yaml_doc,
        file_uri(path),
        None,
    )
    saved = [d.save(relative_uris=False) for d in doc]
    assert saved == JsonDiffMatcher(metaschema_pre)
Exemple #9
0
def load_job(workflow, job, cwl_args=None, cwd=None):
    """
    Tries to load json object from "job". If failed, assumes that
    "job" has been already parsed into Object. Inits loaded "job_data"
    based on the "workflow" (mostly for setting defaults from the workflow
    inputs; never fails). "cwl_args" can be used to update parameters for
    loading and runtime contexts.

    If "job" was file, resolves relative paths based on the job file location.
    If "job" was already parsed into Object, resolves relative paths based on
    "cwd". If "cwd" was None uses "inputs_folder" value from "cwl_args" or
    its default value returned from "get_default_cwl_args" function.

    Checking links after relative paths are resolved is disabled (checklinks
    is set to False in both places). This will prevent rasing an exception by
    schema salad in those cases when an input file will be created from the
    provided content during workflow execution.
    
    Always returns CommentedMap
    """

    cwl_args = {} if cwl_args is None else cwl_args

    default_cwl_args = get_default_cwl_args(cwl_args)
    cwd = default_cwl_args["inputs_folder"] if cwd is None else cwd

    loading_context = setup_loadingContext(
        LoadingContext(default_cwl_args), RuntimeContext(default_cwl_args),
        argparse.Namespace(**default_cwl_args))

    job_copy = deepcopy(job)

    try:
        job_data, _ = loading_context.loader.resolve_ref(job_copy,
                                                         checklinks=False)
    except (FileNotFoundError, SchemaSaladException) as err:
        job_data = load_yaml(json.dumps(job_copy))
        job_data["id"] = file_uri(cwd) + "/"
        job_data, metadata = loading_context.loader.resolve_all(
            job_data, job_data["id"], checklinks=False)

    initialized_job_data = init_job_order(
        job_order_object=job_data,
        args=argparse.Namespace(**default_cwl_args),
        process=slow_cwl_load(workflow=workflow, cwl_args=default_cwl_args),
        loader=loading_context.loader,
        stdout=os.devnull)

    return initialized_job_data
def test_schemas() -> None:
    loader = Loader({})

    path = get_data("tests/EDAM.owl")
    assert path
    ra, _ = loader.resolve_all(
        cmap({
            "$schemas": [file_uri(path)],
            "$namespaces": {
                "edam": "http://edamontology.org/"
            },
            "edam:has_format": "edam:format_1915",
        }),
        "",
    )

    assert {
        "$schemas": [file_uri(path)],
        "$namespaces": {
            "edam": "http://edamontology.org/"
        },
        "http://edamontology.org/has_format":
        "http://edamontology.org/format_1915",
    } == ra
Exemple #11
0
def load_job_order(args,                 # type: argparse.Namespace
                   stdin,                # type: IO[Any]
                   fetcher_constructor,  # Fetcher
                   overrides_list,       # type: List[Dict[Text, Any]]
                   tool_file_uri         # type: Text
                  ):  # type: (...) -> Tuple[MutableMapping[Text, Any], Text, Loader]

    job_order_object = None

    _jobloaderctx = jobloaderctx.copy()
    loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor)  # type: ignore

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir \
            else os.path.abspath(os.path.dirname(job_order_file))
        job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)

    if job_order_object and "http://commonwl.org/cwltool#overrides" in job_order_object:
        overrides_list.extend(
            resolve_overrides(job_order_object, file_uri(job_order_file), tool_file_uri))
        del job_order_object["http://commonwl.org/cwltool#overrides"]

    if not job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()

    return (job_order_object, input_basedir, loader)
Exemple #12
0
def test_fetch_inject_id() -> None:
    path = get_data("schema_salad/tests/inject-id1.yml")
    assert path
    if is_fs_case_sensitive(os.path.dirname(path)):

        def lower(item: str) -> str:
            return item

    else:

        def lower(item: str) -> str:
            return item.lower()

    l1 = Loader({"id": "@id"})
    furi1 = file_uri(path)
    r1, _ = l1.resolve_ref(furi1)
    assert {"id": furi1 + "#foo", "bar": "baz"} == r1
    assert [lower(furi1), lower(furi1 + "#foo")
            ] == sorted(list(lower(k) for k in l1.idx.keys()))

    l2 = Loader({"id": "@id"})
    path2 = get_data("schema_salad/tests/inject-id2.yml")
    assert path2
    furi2 = file_uri(path2)
    r2, _ = l2.resolve_ref(furi2)
    assert {"id": furi2, "bar": "baz"} == r2
    assert [lower(furi2)] == sorted(list(lower(k) for k in l2.idx.keys()))

    l3 = Loader({"id": "@id"})
    path3 = get_data("schema_salad/tests/inject-id3.yml")
    assert path3
    furi3 = file_uri(path3)
    r3, _ = l3.resolve_ref(furi3)
    assert {"id": "http://example.com", "bar": "baz"} == r3
    assert [lower(furi3), "http://example.com"
            ] == sorted(list(lower(k) for k in l3.idx.keys()))
Exemple #13
0
 def __call__(
     self,
     parser: argparse.ArgumentParser,
     namespace: argparse.Namespace,
     values: Union[AnyStr, Sequence[Any], None],
     option_string: Optional[str] = None,
 ) -> None:
     setattr(
         namespace,
         self.dest,
         {
             "class": self.objclass,
             "location": file_uri(str(os.path.abspath(cast(AnyStr, values)))),
         },
     )
def test_fetch_inject_id():
    lower = lambda s: s.lower()
    if is_fs_case_sensitive(
            os.path.dirname(get_data("schema_salad/tests/inject-id1.yml"))):
        lower = lambda a: a
    l1 = Loader({"id": "@id"})
    furi1 = file_uri(get_data("schema_salad/tests/inject-id1.yml"))
    r1, _ = l1.resolve_ref(furi1)
    assert {"id": furi1 + "#foo", "bar": "baz"} == r1
    assert [lower(furi1), lower(furi1 + "#foo")
            ] == sorted(list(lower(k) for k in l1.idx.keys()))

    l2 = Loader({"id": "@id"})
    furi2 = file_uri(get_data("schema_salad/tests/inject-id2.yml"))
    r2, _ = l2.resolve_ref(furi2)
    assert {"id": furi2, "bar": "baz"} == r2
    assert [lower(furi2)] == sorted(list(lower(k) for k in l2.idx.keys()))

    l3 = Loader({"id": "@id"})
    furi3 = file_uri(get_data("schema_salad/tests/inject-id3.yml"))
    r3, _ = l3.resolve_ref(furi3)
    assert {"id": "http://example.com", "bar": "baz"} == r3
    assert [lower(furi3), "http://example.com"
            ] == sorted(list(lower(k) for k in l3.idx.keys()))
Exemple #15
0
def test_load_pt() -> None:
    path = get_data("tests/pt.yml")
    assert path
    doc = cg_metaschema.load_document(
        file_uri(path), "", cg_metaschema.LoadingOptions()
    )
    assert [
        "https://w3id.org/cwl/salad#null",
        "http://www.w3.org/2001/XMLSchema#boolean",
        "http://www.w3.org/2001/XMLSchema#int",
        "http://www.w3.org/2001/XMLSchema#long",
        "http://www.w3.org/2001/XMLSchema#float",
        "http://www.w3.org/2001/XMLSchema#double",
        "http://www.w3.org/2001/XMLSchema#string",
    ] == doc.symbols
Exemple #16
0
def _to_cwl_tool_object(tool_path=None, tool_object=None, cwl_tool_object=None, raw_process_reference=None, strict_cwl_validation=True, tool_directory=None, uuid=None):
    if uuid is None:
        uuid = str(uuid4())
    schema_loader = _schema_loader(strict_cwl_validation)
    if raw_process_reference is None and tool_path is not None:
        assert cwl_tool_object is None
        assert tool_object is None

        raw_process_reference = schema_loader.raw_process_reference(tool_path)
        cwl_tool = schema_loader.tool(
            raw_process_reference=raw_process_reference,
        )
    elif tool_object is not None:
        assert raw_process_reference is None
        assert cwl_tool_object is None

        # Allow loading tools from YAML...
        from ruamel import yaml as ryaml
        import json
        as_str = json.dumps(tool_object)
        tool_object = ryaml.round_trip_load(as_str)
        from schema_salad import sourceline
        from schema_salad.ref_resolver import file_uri
        path = tool_directory
        if path is None:
            path = os.getcwd()
        uri = file_uri(path) + "/"
        sourceline.add_lc_filename(tool_object, uri)
        # tool_object, _ = schema_loader.raw_document_loader.resolve_all(tool_object, uri, checklinks=False)
        raw_process_reference = schema_loader.raw_process_reference_for_object(
            tool_object,
            uri=uri
        )
        cwl_tool = schema_loader.tool(
            raw_process_reference=raw_process_reference,
        )
    else:
        cwl_tool = cwl_tool_object

    if isinstance(cwl_tool, int):
        raise Exception("Failed to load tool.")

    raw_tool = cwl_tool.tool
    # Apply Galaxy hacks to CWL tool representation to bridge semantic differences
    # between Galaxy and cwltool.
    _hack_cwl_requirements(cwl_tool)
    check_requirements(raw_tool)
    return _cwl_tool_object_to_proxy(cwl_tool, uuid, raw_process_reference=raw_process_reference, tool_path=tool_path)
Exemple #17
0
 def __call__(
         self,
         parser,  # type: argparse.ArgumentParser
         namespace,  # type: argparse.Namespace
         values,  # type: Union[AnyStr, Sequence[Any], None]
         option_string=None,  # type: Optional[str]
 ):  # type: (...) -> None
     setattr(
         namespace,
         self.dest,
         {
             "class": self.objclass,
             "location": file_uri(str(os.path.abspath(cast(AnyStr,
                                                           values)))),
         },
     )
Exemple #18
0
def test_mixin() -> None:
    base_url = file_uri(os.path.join(os.getcwd(), "tests"))
    ldr = Loader({})
    path = get_data("tests/mixin.yml")
    assert path
    ra = ldr.resolve_ref(cmap({"$mixin": path, "one": "five"}), base_url=base_url)
    assert {"id": "four", "one": "five"} == ra[0]
    ldr = Loader({"id": "@id"})

    ra = ldr.resolve_all(
        cmap([{"id": "a", "m": {"$mixin": path}}, {"id": "b", "m": {"$mixin": path}}]),
        base_url=base_url,
    )
    assert [
        {"id": base_url + "#a", "m": {"id": base_url + "#a/four", "one": "two"}},
        {"id": base_url + "#b", "m": {"id": base_url + "#b/four", "one": "two"}},
    ] == ra[0]
 def __call__(
     self,
     parser,  # type: argparse.ArgumentParser
     namespace,  # type: argparse.Namespace
     values,  # type: Union[AnyStr, Sequence[Any], None]
     option_string=None  # type: Optional[Text]
 ):  # type: (...) -> None
     g = getattr(namespace, self.dest)
     if not g:
         g = []
         setattr(namespace, self.dest, g)
     g.append({
         "class":
         self.objclass,
         "location":
         file_uri(str(os.path.abspath(cast(AnyStr, values))))
     })
Exemple #20
0
def test_include() -> None:
    doc = {"name": "hello", "doc": [{"$include": "hello.txt"}], "type": "documentation"}
    path = get_data("tests/_")
    assert path
    rf = cg_metaschema.Documentation.fromDoc(
        doc,
        "http://example.com/",
        cg_metaschema.LoadingOptions(fileuri=file_uri(path)),
    )
    assert "http://example.com/#hello" == rf.name
    assert ["hello world!\n"] == rf.doc
    assert "documentation" == rf.type
    assert {
        "name": "http://example.com/#hello",
        "doc": ["hello world!\n"],
        "type": "documentation",
    } == rf.save()
Exemple #21
0
def revmap_file(builder, outdir, f):
    # type: (Builder, Text, Dict[Text, Any]) -> Union[Dict[Text, Any], None]
    """Remap a file from internal path to external path.

    For Docker, this maps from the path inside tho container to the path
    outside the container. Recognizes files in the pathmapper or remaps
    internal output directories to the external directory.
    """

    split = urllib.parse.urlsplit(outdir)
    if not split.scheme:
        outdir = file_uri(str(outdir))

    if "location" in f:
        if f["location"].startswith("file://"):
            path = uri_file_path(f["location"])
            revmap_f = builder.pathmapper.reversemap(path)
            if revmap_f:
                f["location"] = revmap_f[1]
            elif path == builder.outdir:
                f["location"] = outdir
            elif path.startswith(builder.outdir):
                f["location"] = builder.fs_access.join(
                    outdir, path[len(builder.outdir) + 1:])
        return f

    if "path" in f:
        path = f["path"]
        del f["path"]
        revmap_f = builder.pathmapper.reversemap(path)
        if revmap_f:
            f["location"] = revmap_f[1]
            return f
        elif path.startswith(builder.outdir):
            f["location"] = builder.fs_access.join(
                outdir, path[len(builder.outdir) + 1:])
            return f
        else:
            raise WorkflowException(
                u"Output file path %s must be within designated output directory (%s) or an input "
                u"file pass through." % (path, builder.outdir))

    raise WorkflowException(
        u"Output File object is missing both `location` and `path` fields: %s"
        % f)
Exemple #22
0
 def __call__(self, parser, namespace, values, option_string=None):
     # type: (argparse.ArgumentParser, argparse.Namespace, Union[AnyStr, Sequence[Any], None], AnyStr) -> None
     g = getattr(
         namespace,
         self.dest  # type: ignore
     )
     if not g:
         g = []
         setattr(
             namespace,
             self.dest,  # type: ignore
             g)
     g.append({
         "class":
         self.objclass,
         "location":
         file_uri(str(os.path.abspath(cast(AnyStr, values))))
     })
Exemple #23
0
def test_import():
    doc = {"type": "record", "fields": [{"$import": "hellofield.yml"}]}
    lead = file_uri(os.path.normpath(get_data("tests")))
    rs = cg_metaschema.RecordSchema.fromDoc(
        doc, "http://example.com/",
        cg_metaschema.LoadingOptions(fileuri=lead + "/_"))
    assert "record" == rs.type
    assert lead + "/hellofield.yml#hello" == rs.fields[0].name
    assert "hello world!\n" == rs.fields[0].doc
    assert "string" == rs.fields[0].type
    assert {
        "type":
        "record",
        "fields": [{
            "name": lead + "/hellofield.yml#hello",
            "doc": "hello world!\n",
            "type": "string",
        }],
    } == rs.save()
Exemple #24
0
    def parse_listing(self, listing, inputs):
        for item in listing:
            if "contents" in item:
                loc = self.fs_access.join(self.tmpdir, item["basename"])
                with self.fs_access.open(loc, "wb") as gen:
                    gen.write(item["contents"])
            else:
                loc = item["location"]

            parameter = tes.Input(
                name=item["basename"],
                description="InitialWorkDirRequirement:cwl_input:%s" %
                (item["basename"]),
                url=file_uri(loc),
                path=self.fs_access.join(self.docker_workdir,
                                         item["basename"]),
                type=item["class"].upper())
            inputs.append(parameter)

        return inputs
Exemple #25
0
 def test_import(self):
     doc = {"type": "record", "fields": [{"$import": "hellofield.yml"}]}
     lead = file_uri(os.path.normpath(get_data("tests")))
     rs = cg_metaschema.RecordSchema(
         doc, "http://example.com/",
         cg_metaschema.LoadingOptions(fileuri=lead + "/_"))
     self.assertEqual("record", rs.type)
     self.assertEqual(lead + "/hellofield.yml#hello", rs.fields[0].name)
     self.assertEqual("hello world!\n", rs.fields[0].doc)
     self.assertEqual("string", rs.fields[0].type)
     self.assertEqual(
         {
             "type":
             "record",
             "fields": [{
                 "name": lead + "/hellofield.yml#hello",
                 "doc": "hello world!\n",
                 "type": "string"
             }]
         }, rs.save())
Exemple #26
0
def revmap_file(builder, outdir, f):
    # type: (Builder, Text, Dict[Text, Any]) -> Union[Dict[Text, Any], None]

    """Remap a file from internal path to external path.

    For Docker, this maps from the path inside tho container to the path
    outside the container. Recognizes files in the pathmapper or remaps
    internal output directories to the external directory.
    """

    split = urllib.parse.urlsplit(outdir)
    if not split.scheme:
        outdir = file_uri(str(outdir))

    if "location" in f:
        if f["location"].startswith("file://"):
            path = uri_file_path(f["location"])
            revmap_f = builder.pathmapper.reversemap(path)
            if revmap_f:
                f["location"] = revmap_f[1]
            elif path == builder.outdir:
                f["location"] = outdir
            elif path.startswith(builder.outdir):
                f["location"] = builder.fs_access.join(outdir, path[len(builder.outdir) + 1:])
        return f

    if "path" in f:
        path = f["path"]
        del f["path"]
        revmap_f = builder.pathmapper.reversemap(path)
        if revmap_f:
            f["location"] = revmap_f[1]
            return f
        elif path.startswith(builder.outdir):
            f["location"] = builder.fs_access.join(outdir, path[len(builder.outdir) + 1:])
            return f
        else:
            raise WorkflowException(u"Output file path %s must be within designated output directory (%s) or an input "
                                    u"file pass through." % (path, builder.outdir))

    raise WorkflowException(u"Output File object is missing both `location` and `path` fields: %s" % f)
Exemple #27
0
def fetch_document(
    argsworkflow,  # type: Union[Text, dict[Text, Any]]
    resolver=None,  # type: Callable[[Loader, Union[Text, dict[Text, Any]]], Text]
    fetcher_constructor=None
    # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
):
    # type: (...) -> Tuple[Loader, CommentedMap, Text]
    """Retrieve a CWL document."""

    document_loader = Loader({
        "cwl": "https://w3id.org/cwl/cwl#",
        "id": "@id"
    },
                             fetcher_constructor=fetcher_constructor)

    uri = None  # type: Text
    workflowobj = None  # type: CommentedMap
    if isinstance(argsworkflow, string_types):
        split = urllib.parse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        elif os.path.exists(os.path.abspath(argsworkflow)):
            uri = file_uri(str(os.path.abspath(argsworkflow)))
        elif resolver:
            uri = resolver(document_loader, argsworkflow)

        if uri is None:
            raise ValidationException("Not found: '%s'" % argsworkflow)

        if argsworkflow != uri:
            _logger.info("Resolved '%s' to '%s'", argsworkflow, uri)

        fileuri = urllib.parse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = "#" + Text(id(argsworkflow))
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
Exemple #28
0
 def test_include(self):
     doc = {
         "name": "hello",
         "doc": [{
             "$include": "hello.txt"
         }],
         "type": "documentation"
     }
     rf = cg_metaschema.Documentation(
         doc, "http://example.com/",
         cg_metaschema.LoadingOptions(
             fileuri=file_uri(get_data("tests/_"))))
     self.assertEqual("http://example.com/#hello", rf.name)
     self.assertEqual(["hello world!\n"], rf.doc)
     self.assertEqual("documentation", rf.type)
     self.assertEqual(
         {
             "name": "http://example.com/#hello",
             "doc": ["hello world!\n"],
             "type": "documentation"
         }, rf.save())
Exemple #29
0
    def cwl_dispatch(self, json):
        try:
            cwlwf, it_is_workflow = load_cwl(
                self.dag.default_args["cwl_workflow"], self.dag.default_args)
            cwl_context = {
                "outdir":
                mkdtemp(dir=get_folder(os.path.abspath(self.tmp_folder)),
                        prefix="dag_tmp_")
            }

            _jobloaderctx = jobloaderctx.copy()
            _jobloaderctx.update(cwlwf.metadata.get("$namespaces", {}))
            loader = Loader(_jobloaderctx)

            try:
                job_order_object = yaml.round_trip_load(
                    io.StringIO(initial_value=dumps(json)))
                job_order_object, _ = loader.resolve_all(
                    job_order_object,
                    file_uri(os.getcwd()) + "/",
                    checklinks=False)
            except Exception as e:
                _logger.error("Job Loader: {}".format(str(e)))

            job_order_object = init_job_order(job_order_object, None, cwlwf,
                                              loader, sys.stdout)

            cwl_context['promises'] = job_order_object

            logging.info('{0}: Final job: \n {1}'.format(
                self.task_id, dumps(cwl_context, indent=4)))

            return cwl_context

        except Exception as e:
            _logger.info('Dispatch Exception {0}: \n {1} {2}'.format(
                self.task_id, type(e), e))
            pass
        return None
Exemple #30
0
def fetch_document(
    argsworkflow,  # type: Union[Text, Dict[Text, Any]]
    resolver=None,  # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
    fetcher_constructor=None
    # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
):
    # type: (...) -> Tuple[Loader, CommentedMap, Text]
    """Retrieve a CWL document."""

    document_loader = Loader(
        jobloaderctx, fetcher_constructor=fetcher_constructor)  # type: ignore

    uri = None  # type: Text
    workflowobj = None  # type: CommentedMap
    if isinstance(argsworkflow, string_types):
        split = urllib.parse.urlsplit(argsworkflow)
        # In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that
        if split.scheme and split.scheme in [u'http', u'https', u'file']:
            uri = argsworkflow
        elif os.path.exists(os.path.abspath(argsworkflow)):
            uri = file_uri(str(os.path.abspath(argsworkflow)))
        elif resolver:
            uri = resolver(document_loader, argsworkflow)

        if uri is None:
            raise ValidationException("Not found: '%s'" % argsworkflow)

        if argsworkflow != uri:
            _logger.info("Resolved '%s' to '%s'", argsworkflow, uri)

        fileuri = urllib.parse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        uri = "#" + Text(id(argsworkflow))
        workflowobj = cast(CommentedMap, cmap(argsworkflow, fn=uri))
    else:
        raise ValidationException("Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
def test_attachments():
    furi = file_uri(get_data("schema_salad/tests/multidoc.yml"))

    l1 = Loader({})
    r1, _ = l1.resolve_ref(furi)
    with open(get_data("schema_salad/tests/multidoc.yml"), "rt") as f:
        content = f.read()
        assert {"foo": "bar", "baz": content, "quux": content} == r1

    l2 = Loader({}, allow_attachments=lambda x: x["foo"] == "bar")
    r2, _ = l2.resolve_ref(furi)
    assert {
        "foo": "bar",
        "baz": "This is the {first attachment}.\n",
        "quux": "This is the [second attachment].",
    } == r2

    l3 = Loader({}, allow_attachments=lambda x: x["foo"] == "baz")
    r3, _ = l3.resolve_ref(furi)
    with open(get_data("schema_salad/tests/multidoc.yml"), "rt") as f:
        content = f.read()
        assert {"foo": "bar", "baz": content, "quux": content} == r3
Exemple #32
0
def to_cwl_tool_object(tool_path=None, tool_object=None, persisted_tool=None, strict_cwl_validation=True):
    schema_loader = _schema_loader(strict_cwl_validation)
    if tool_path is not None:
        cwl_tool = schema_loader.tool(
            path=tool_path
        )
    elif tool_object is not None:
        # Allow loading tools from YAML...
        from ruamel import yaml as ryaml
        import json
        as_str = json.dumps(tool_object)
        tool_object = ryaml.round_trip_load(as_str)
        from schema_salad import sourceline
        from schema_salad.ref_resolver import file_uri
        uri = file_uri(os.getcwd()) + "/"
        sourceline.add_lc_filename(tool_object, uri)
        tool_object, _ = schema_loader.raw_document_loader.resolve_all(tool_object, uri)
        raw_process_reference = schema_loader.raw_process_reference_for_object(
            tool_object,
            uri=uri
        )
        cwl_tool = schema_loader.tool(
            raw_process_reference=raw_process_reference,
        )
    else:
        cwl_tool = ToolProxy.from_persistent_representation(persisted_tool)

    if isinstance(cwl_tool, int):
        raise Exception("Failed to load tool.")

    raw_tool = cwl_tool.tool
    # Apply Galaxy hacks to CWL tool representation to bridge semantic differences
    # between Galaxy and cwltool.
    _hack_cwl_requirements(cwl_tool)
    check_requirements(raw_tool)
    return cwl_tool_object_to_proxy(cwl_tool, tool_path=tool_path)
Exemple #33
0
def normalize_spec(schema):

    # Get the metaschema to validate the schema
    metaschema_names, metaschema_doc, metaschema_loader = sc.get_metaschema()

    # Load schema document and resolve refs

    # if not (urllib.parse.urlparse(schema_uri)[0] and urllib.parse.urlparse(schema_uri)[0] in [u'http', u'https', u'file']):
    schema_uri = file_uri(os.path.abspath(schema))
    schema_raw_doc = metaschema_loader.fetch(schema_uri)

    try:
        schema_doc, schema_metadata = metaschema_loader.resolve_all(
            schema_raw_doc, schema_uri)
    except validate.ValidationException as e:
        _logger.error("Schema `%s` failed link checking:\n%s", schema, e)
        _logger.debug("Index is %s", list(metaschema_loader.idx.keys()))
        _logger.debug("Vocabulary is %s", list(metaschema_loader.vocab.keys()))
        return 1
    except RuntimeError as e:
        _logger.error("Schema `%s` read error:\n%s", schema, e)
        return 1

    return schema_doc
Exemple #34
0
def to_cwl_tool_object(tool_path=None, tool_object=None, persisted_tool=None, strict_cwl_validation=True):
    schema_loader = _schema_loader(strict_cwl_validation)
    if tool_path is not None:
        cwl_tool = schema_loader.tool(
            path=tool_path
        )
    elif tool_object is not None:
        # Allow loading tools from YAML...
        from ruamel import yaml as ryaml
        import json
        as_str = json.dumps(tool_object)
        tool_object = ryaml.round_trip_load(as_str)
        from schema_salad import sourceline
        from schema_salad.ref_resolver import file_uri
        uri = file_uri(os.getcwd()) + "/"
        sourceline.add_lc_filename(tool_object, uri)
        tool_object, _ = schema_loader.raw_document_loader.resolve_all(tool_object, uri)
        raw_process_reference = schema_loader.raw_process_reference_for_object(
            tool_object,
            uri=uri
        )
        cwl_tool = schema_loader.tool(
            raw_process_reference=raw_process_reference,
        )
    else:
        cwl_tool = ToolProxy.from_persistent_representation(persisted_tool)

    if isinstance(cwl_tool, int):
        raise Exception("Failed to load tool.")

    raw_tool = cwl_tool.tool
    # Apply Galaxy hacks to CWL tool representation to bridge semantic differences
    # between Galaxy and cwltool.
    _hack_cwl_requirements(cwl_tool)
    check_requirements(raw_tool)
    return cwl_tool_object_to_proxy(cwl_tool, tool_path=tool_path)
Exemple #35
0
def main(argsl=None,                   # type: List[str]
         args=None,                    # type: argparse.Namespace
         job_order_object=None,        # type: MutableMapping[Text, Any]
         stdin=sys.stdin,              # type: IO[Any]
         stdout=None,                  # type: Union[TextIO, StreamWriter]
         stderr=sys.stderr,            # type: IO[Any]
         versionfunc=versionstring,    # type: Callable[[], Text]
         logger_handler=None,          #
         custom_schema_callback=None,  # type: Callable[[], None]
         executor=None,                # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         loadingContext=None,          # type: LoadingContext
         runtimeContext=None           # type: RuntimeContext
        ):  # type: (...) -> int
    if not stdout:  # force UTF-8 even if the console is configured differently
        if (hasattr(sys.stdout, "encoding")  # type: ignore
                and sys.stdout.encoding != 'UTF-8'):  # type: ignore
            if PY3 and hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
            else:
                stdout = getwriter('utf-8')(sys.stdout)  # type: ignore
        else:
            stdout = cast(TextIO, sys.stdout)  # type: ignore

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler is not None:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    # pre-declared for finally block
    workflowobj = None
    prov_log_handler = None  # type: Optional[logging.StreamHandler]
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)
            if args.record_container_id:
                if not args.cidfile_dir:
                    args.cidfile_dir = os.getcwd()
                del args.record_container_id

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in iteritems(get_default_args()):
            if not hasattr(args, key):
                setattr(args, key, val)

        # Configure logging
        rdflib_logger = logging.getLogger("rdflib.term")
        rdflib_logger.addHandler(stderr_handler)
        rdflib_logger.setLevel(logging.ERROR)
        if args.quiet:
            # Silence STDERR, not an eventual provenance log file
            stderr_handler.setLevel(logging.WARN)
        if runtimeContext.debug:
            # Increase to debug for both stderr and provenance log file
            _logger.setLevel(logging.DEBUG)
            rdflib_logger.setLevel(logging.DEBUG)
        formatter = None  # type: Optional[logging.Formatter]
        if args.timestamps:
            formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                          "%Y-%m-%d %H:%M:%S")
            stderr_handler.setFormatter(formatter)
        ##

        if args.version:
            print(versionfunc())
            return 0
        _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supported_cwl_versions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback is not None:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")
        if args.provenance:
            if not args.compute_checksum:
                _logger.error("--provenance incompatible with --no-compute-checksum")
                return 1
            ro = ResearchObject(
                temp_prefix_ro=args.tmpdir_prefix, orcid=args.orcid,
                full_name=args.cwl_full_name)
            runtimeContext.research_obj = ro
            log_file_io = ro.open_log_file_for_activity(ro.engine_uuid)
            prov_log_handler = logging.StreamHandler(log_file_io)

            class ProvLogFormatter(logging.Formatter):
                """Enforce ISO8601 with both T and Z."""
                def __init__(self):  # type: () -> None
                    super(ProvLogFormatter, self).__init__(
                        "[%(asctime)sZ] %(message)s")

                def formatTime(self, record, datefmt=None):
                    # type: (logging.LogRecord, str) -> str
                    record_time = time.gmtime(record.created)
                    formatted_time = time.strftime("%Y-%m-%dT%H:%M:%S", record_time)
                    with_msecs = "%s,%03d" % (formatted_time, record.msecs)
                    return with_msecs
            prov_log_handler.setFormatter(ProvLogFormatter())
            _logger.addHandler(prov_log_handler)
            _logger.debug(u"[provenance] Logging to %s", log_file_io)
            if argsl is not None:
                # Log cwltool command line options to provenance file
                _logger.info("[cwltool] %s %s", sys.argv[0], u" ".join(argsl))
            _logger.debug(u"[cwltool] Arguments: %s", args)

        if loadingContext is None:
            loadingContext = LoadingContext(vars(args))
        else:
            loadingContext = loadingContext.copy()
        loadingContext.loader = default_loader(loadingContext.fetcher_constructor)
        loadingContext.research_obj = runtimeContext.research_obj
        loadingContext.disable_js_validation = \
            args.disable_js_validation or (not args.do_validate)
        loadingContext.construct_tool_object = getdefault(
            loadingContext.construct_tool_object, workflow.default_make_tool)
        loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver)
        loadingContext.do_update = not (args.pack or args.print_subgraph)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow, resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor)

        try_again_msg = "" if args.debug else ", try again with --debug for more information"

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, loadingContext.fetcher_constructor,
                loadingContext.overrides_list, tool_file_uri)

            if args.overrides:
                loadingContext.overrides_list.extend(load_overrides(
                    file_uri(os.path.abspath(args.overrides)), tool_file_uri))

            loadingContext, workflowobj, uri = fetch_document(
                uri, loadingContext)

            if args.print_deps and loadingContext.loader:
                printdeps(workflowobj, loadingContext.loader, stdout,
                          args.relative_deps, uri)
                return 0

            loadingContext, uri \
                = resolve_and_validate_document(loadingContext, workflowobj, uri,
                                    preprocess_only=(args.print_pre or args.pack),
                                    skip_schemas=args.skip_schemas)
            
            if loadingContext.loader is None:
                raise Exception("Impossible code path.")
            processobj, metadata = loadingContext.loader.resolve_ref(uri)
            processobj = cast(CommentedMap, processobj)
            if args.pack:
                stdout.write(print_pack(loadingContext.loader, processobj, uri, metadata))
                return 0

            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(loadingContext.loader, processobj, uri, metadata))

            if args.print_pre:
                stdout.write(json_dumps(processobj, indent=4, sort_keys=True, separators=(',', ': ')))
                return 0

            tool = make_tool(uri, loadingContext)
            if args.make_template:
                def my_represent_none(self, data):  # pylint: disable=unused-argument
                    """Force clean representation of 'null'."""
                    return self.represent_scalar(u'tag:yaml.org,2002:null', u'null')
                yaml.RoundTripRepresenter.add_representer(type(None), my_represent_none)
                yaml.round_trip_dump(
                    generate_input_template(tool), sys.stdout,
                    default_flow_style=False, indent=4, block_seq_indent=2)
                return 0

            if args.validate:
                print("{} is valid CWL.".format(args.workflow))
                return 0

            if args.print_rdf:
                stdout.write(printrdf(tool, loadingContext.loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, loadingContext.loader.ctx, stdout)
                return 0

            if args.print_targets:
                for f in ("outputs", "steps", "inputs"):
                    if tool.tool[f]:
                        _logger.info("%s%s targets:", f[0].upper(), f[1:-1])
                        stdout.write("  "+"\n  ".join([shortname(t["id"]) for t in tool.tool[f]])+"\n")
                return 0

            if args.target:
                if isinstance(tool, Workflow):
                    url = urllib.parse.urlparse(tool.tool["id"])
                    if url.fragment:
                        extracted = get_subgraph([tool.tool["id"] + "/" + r for r in args.target], tool)
                    else:
                        extracted = get_subgraph([loadingContext.loader.fetcher.urljoin(tool.tool["id"], "#" + r)
                                                 for r in args.target],
                                                 tool)
                else:
                    _logger.error("Can only use --target on Workflows")
                    return 1
                loadingContext.loader.idx[extracted["id"]] = extracted
                tool = make_tool(extracted["id"],
                                 loadingContext)

            if args.print_subgraph:
                if "name" in tool.tool:
                    del tool.tool["name"]
                stdout.write(json_dumps(tool.tool, indent=4, sort_keys=True, separators=(',', ': ')))
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                exc if not args.debug else "",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool
        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path
            if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmpdir_prefix = default_mac_path

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                    else ""
                setattr(runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore())
        runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        try:
            initialized_job_order_object = init_job_order(
                job_order_object, args, tool, jobloader, stdout,
                print_input_deps=args.print_input_deps,
                relative_deps=args.relative_deps,
                make_fs_access=runtimeContext.make_fs_access,
                input_basedir=input_basedir,
                secret_store=runtimeContext.secret_store)
        except SystemExit as err:
            return err.code

        if not executor:
            if args.parallel:
                executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = executor.select_resources
            else:
                executor = SingleJobExecutor()

        try:
            runtimeContext.basedir = input_basedir
            del args.workflow
            del args.job_order

            conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies", None)  # Text

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(args)
            else:
                runtimeContext.find_default_container = functools.partial(
                    find_default_container,
                    default_container=runtimeContext.default_container,
                    use_biocontainers=args.beta_use_biocontainers)

            (out, status) = executor(tool,
                                     initialized_job_order_object,
                                     runtimeContext,
                                     logger=_logger)

            if out is not None:
                if runtimeContext.research_obj is not None:
                    runtimeContext.research_obj.create_job(
                        out, None, True)

                def loc_to_path(obj):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if obj["location"].startswith("file://"):
                        obj["path"] = uri_file_path(obj["location"])

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ), MutationManager().unset_generation)

                if isinstance(out, string_types):
                    stdout.write(out)
                else:
                    stdout.write(json_dumps(out, indent=4,  # type: ignore
                                            ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()  # type: ignore

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            _logger.info(u"Final process status is %s", status)
            return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(Text(exc)),
                exc_info=args.debug)
            return 1
        except Exception as exc:  # pylint: disable=broad-except
            _logger.error(
                u"Unhandled error%s:\n  %s", try_again_msg, exc, exc_info=args.debug)
            return 1

    finally:
        if args and runtimeContext and runtimeContext.research_obj \
                and workflowobj and loadingContext:
            research_obj = runtimeContext.research_obj
            if loadingContext.loader is not None:
                research_obj.generate_snapshot(prov_deps(
                    workflowobj, loadingContext.loader, uri))
            else:
                _logger.warning("Unable to generate provenance snapshot "
                    " due to missing loadingContext.loader.")
            if prov_log_handler is not None:
                # Stop logging so we won't half-log adding ourself to RO
                _logger.debug(u"[provenance] Closing provenance log file %s",
                    prov_log_handler)
                _logger.removeHandler(prov_log_handler)
                # Ensure last log lines are written out
                prov_log_handler.flush()
                # Underlying WritableBagFile will add the tagfile to the manifest
                prov_log_handler.stream.close()
                prov_log_handler.close()
            research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Exemple #36
0
def init_job_order(
    job_order_object,  # type: Optional[MutableMapping[Text, Any]]
    args,  # type: argparse.Namespace
    t,  # type: Process
    loader,  # type: Loader
    stdout,  # type: Union[TextIO, StreamWriter]
    print_input_deps=False,  # type: bool
    provArgs=None,  # type: ResearchObject
    relative_deps=False,  # type: bool
    make_fs_access=None,  # type: Callable[[Text], StdFsAccess]
    input_basedir="",  # type: Text
    secret_store=None  # type: SecretStore
):  # type: (...) -> Tuple[MutableMapping[Text, Any], Optional[MutableMapping[Text, Any]]]
    secrets_req, _ = t.get_requirement("http://commonwl.org/cwltool#Secrets")
    if not job_order_object:
        namemap = {}  # type: Dict[Text, Text]
        records = []  # type: List[Text]
        toolparser = generate_parser(
            argparse.ArgumentParser(prog=args.workflow), t, namemap, records)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                exit(0)
            cmd_line = vars(toolparser.parse_args(args.job_order))
            for record_name in records:
                record = {}
                record_items = {
                    k: v
                    for k, v in six.iteritems(cmd_line)
                    if k.startswith(record_name)
                }
                for key, value in six.iteritems(record_items):
                    record[key[len(record_name) + 1:]] = value
                    del cmd_line[key]
                cmd_line[str(record_name)] = record

            if cmd_line["job_order"]:
                try:
                    job_order_object = cast(
                        MutableMapping,
                        loader.resolve_ref(cmd_line["job_order"])[0])
                except Exception as e:
                    _logger.error(Text(e), exc_info=args.debug)
                    exit(1)
            else:
                job_order_object = {"id": args.workflow}

            del cmd_line["job_order"]

            job_order_object.update(
                {namemap[k]: v
                 for k, v in cmd_line.items()})

            if secret_store and secrets_req:
                secret_store.store(
                    [shortname(sc) for sc in secrets_req["secrets"]],
                    job_order_object)

            if _logger.isEnabledFor(logging.DEBUG):
                _logger.debug(u"Parsed job order from command line: %s",
                              json_dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"])
                                 not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object:
        if len(t.tool["inputs"]) > 0:
            if toolparser:
                print(u"\nOptions for {} ".format(args.workflow))
                toolparser.print_help()
            _logger.error("")
            _logger.error("Input object required, use --help for details")
            exit(1)
        else:
            job_order_object = {}
    if provArgs:
        jobobj_for_prov = copy.deepcopy(job_order_object)
        input_for_prov = printdeps(jobobj_for_prov,
                                   loader,
                                   stdout,
                                   relative_deps,
                                   "",
                                   provArgs,
                                   basedir=file_uri(str(input_basedir) + "/"))

    if print_input_deps:
        printdeps(job_order_object,
                  loader,
                  stdout,
                  relative_deps,
                  "",
                  basedir=file_uri(str(input_basedir) + "/"))
        exit(0)

    def pathToLoc(p):
        if "location" not in p and "path" in p:
            p["location"] = p["path"]
            del p["path"]

    ns = {}  # type: Dict[Text, Union[Dict[Any, Any], Text, Iterable[Text]]]
    ns.update(t.metadata.get("$namespaces", {}))
    ld = Loader(ns)

    def expand_formats(p):
        if "format" in p:
            p["format"] = ld.expand_url(p["format"], "")

    visit_class(job_order_object, ("File", "Directory"), pathToLoc)
    visit_class(job_order_object, ("File", ), add_sizes)
    visit_class(job_order_object, ("File", ), expand_formats)
    adjustDirObjs(job_order_object, trim_listing)
    normalizeFilesDirs(job_order_object)

    if secret_store and secrets_req:
        secret_store.store([shortname(sc) for sc in secrets_req["secrets"]],
                           job_order_object)

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]
    if provArgs:
        return (job_order_object, input_for_prov[1])
    return (job_order_object, None)
Exemple #37
0
def main(
    argsl=None,  # type: List[str]
    args=None,  # type: argparse.Namespace
    job_order_object=None,  # type: MutableMapping[Text, Any]
    stdin=sys.stdin,  # type: IO[Any]
    stdout=None,  # type: Union[TextIO, codecs.StreamWriter]
    stderr=sys.stderr,  # type: IO[Any]
    versionfunc=versionstring,  # type: Callable[[], Text]
    logger_handler=None,  #
    custom_schema_callback=None,  # type: Callable[[], None]
    executor=None,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
    loadingContext=None,  # type: LoadingContext
    runtimeContext=None  # type: RuntimeContext
):  # type: (...) -> int
    if not stdout:  # force UTF-8 even if the console is configured differently
        if (hasattr(sys.stdout, "encoding")  # type: ignore
                and sys.stdout.encoding != 'UTF-8'):  # type: ignore
            if six.PY3 and hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
            else:
                stdout = codecs.getwriter('utf-8')(sys.stdout)  # type: ignore
        else:
            stdout = cast(TextIO, sys.stdout)  # type: ignore

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    # pre-declared for finally block
    workflowobj = None
    input_for_prov = None
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in six.iteritems(get_default_args()):
            if not hasattr(args, key):
                setattr(args, key, val)

        rdflib_logger = logging.getLogger("rdflib.term")
        rdflib_logger.addHandler(stderr_handler)
        rdflib_logger.setLevel(logging.ERROR)
        if args.quiet:
            _logger.setLevel(logging.WARN)
        if runtimeContext.debug:
            _logger.setLevel(logging.DEBUG)
            rdflib_logger.setLevel(logging.DEBUG)
        if args.timestamps:
            formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                          "%Y-%m-%d %H:%M:%S")
            stderr_handler.setFormatter(formatter)

        if args.version:
            print(versionfunc())
            return 0
        else:
            _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supportedCWLversions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error(
                    "CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool",
                              res.read())
            res.close()
        else:
            use_standard_schema("v1.0")
        #call function from provenance.py if the provenance flag is enabled.
        if args.provenance:
            if not args.compute_checksum:
                _logger.error(
                    "--provenance incompatible with --no-compute-checksum")
                return 1

            runtimeContext.research_obj = ResearchObject(
                temp_prefix_ro=args.tmpdir_prefix,
                # Optionals, might be None
                orcid=args.orcid,
                full_name=args.cwl_full_name)

        if loadingContext is None:
            loadingContext = LoadingContext(vars(args))
        else:
            loadingContext = loadingContext.copy()
        loadingContext.research_obj = runtimeContext.research_obj
        loadingContext.disable_js_validation = \
            args.disable_js_validation or (not args.do_validate)
        loadingContext.construct_tool_object = getdefault(
            loadingContext.construct_tool_object, workflow.default_make_tool)
        loadingContext.resolver = getdefault(loadingContext.resolver,
                                             tool_resolver)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow,
            resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor)

        try_again_msg = "" if args.debug else ", try again with --debug for more information"

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, loadingContext.fetcher_constructor,
                loadingContext.overrides_list, tool_file_uri)

            if args.overrides:
                loadingContext.overrides_list.extend(
                    load_overrides(file_uri(os.path.abspath(args.overrides)),
                                   tool_file_uri))

            document_loader, workflowobj, uri = fetch_document(
                uri,
                resolver=loadingContext.resolver,
                fetcher_constructor=loadingContext.fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout,
                          args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=loadingContext.enable_dev,
                                    strict=loadingContext.strict,
                                    preprocess_only=(args.print_pre or args.pack),
                                    fetcher_constructor=loadingContext.fetcher_constructor,
                                    skip_schemas=args.skip_schemas,
                                    overrides=loadingContext.overrides_list,
                                    do_validate=loadingContext.do_validate)
            if args.pack:
                stdout.write(
                    print_pack(document_loader, processobj, uri, metadata))
                return 0
            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(document_loader, processobj, uri, metadata))

            if args.print_pre:
                stdout.write(json_dumps(processobj, indent=4))
                return 0

            loadingContext.overrides_list.extend(
                metadata.get("cwltool:overrides", []))

            tool = make_tool(document_loader, avsc_names, metadata, uri,
                             loadingContext)
            if args.make_template:
                yaml.safe_dump(generate_input_template(tool),
                               sys.stdout,
                               default_flow_style=False,
                               indent=4,
                               block_seq_indent=2)
                return 0

            if args.validate:
                _logger.info("Tool definition is valid")
                return 0

            if args.print_rdf:
                stdout.write(
                    printrdf(tool, document_loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                exc if not args.debug else "",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool
        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(runtimeContext, dirprefix) and getattr(
                    runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                        else ""
                setattr(
                    runtimeContext, dirprefix,
                    os.path.abspath(getattr(runtimeContext, dirprefix)) + sl)
                if not os.path.exists(
                        os.path.dirname(getattr(runtimeContext, dirprefix))):
                    try:
                        os.makedirs(
                            os.path.dirname(getattr(runtimeContext,
                                                    dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store,
                                                 SecretStore())
        try:
            initialized_job_order_object, input_for_prov = init_job_order(
                job_order_object,
                args,
                tool,
                jobloader,
                stdout,
                print_input_deps=args.print_input_deps,
                provArgs=runtimeContext.research_obj,
                relative_deps=args.relative_deps,
                input_basedir=input_basedir,
                secret_store=runtimeContext.secret_store)
        except SystemExit as err:
            return err.code

        if not executor:
            if args.parallel:
                executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = executor.select_resources
            else:
                executor = SingleJobExecutor()
        assert executor is not None

        try:
            runtimeContext.basedir = input_basedir
            del args.workflow
            del args.job_order

            conf_file = getattr(args,
                                "beta_dependency_resolvers_configuration",
                                None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies",
                                             None)  # Text

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(
                    args)

            runtimeContext.find_default_container = functools.partial(
                find_default_container,
                default_container=runtimeContext.default_container,
                use_biocontainers=args.beta_use_biocontainers)
            runtimeContext.make_fs_access = getdefault(
                runtimeContext.make_fs_access, StdFsAccess)
            (out, status) = executor(tool,
                                     initialized_job_order_object,
                                     runtimeContext,
                                     logger=_logger)

            if out is not None:

                def loc_to_path(obj):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if obj["location"].startswith("file://"):
                        obj["path"] = uri_file_path(obj["location"])

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ),
                            MutationManager().unset_generation)

                if isinstance(out, string_types):
                    stdout.write(out)
                else:
                    stdout.write(
                        json_dumps(
                            out,
                            indent=4,  # type: ignore
                            ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()  # type: ignore

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            _logger.info(u"Final process status is %s", status)
            return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(u"Workflow or tool uses unsupported feature:\n%s",
                          exc,
                          exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(u"Workflow error%s:\n%s",
                          try_again_msg,
                          strip_dup_lineno(six.text_type(exc)),
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(u"Unhandled error%s:\n  %s",
                          try_again_msg,
                          exc,
                          exc_info=args.debug)
            return 1

    finally:
        if args and runtimeContext and runtimeContext.research_obj \
                and args.rm_tmpdir and workflowobj:
            #adding all related cwl files to RO
            prov_dependencies = printdeps(workflowobj, document_loader, stdout,
                                          args.relative_deps, uri,
                                          runtimeContext.research_obj)
            prov_dep = prov_dependencies[1]
            assert prov_dep
            runtimeContext.research_obj.generate_snapshot(prov_dep)
            #for input file dependencies
            if input_for_prov:
                runtimeContext.research_obj.generate_snapshot(input_for_prov)
            #NOTE: keep these commented out lines to evaluate tests later
            #if job_order_object:
            #runtimeContext.research_obj.generate_snapshot(job_order_object)

            runtimeContext.research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Exemple #38
0
 def _check_adjust(f):
     f["location"] = file_uri(pm.mapper(f["location"])[1])
     if "contents" in f:
         del f["contents"]
     return f
Exemple #39
0
 def _check_adjust(f):
     f["location"] = file_uri(pm.mapper(f["location"])[1])
     if "contents" in f:
         del f["contents"]
     return f
Exemple #40
0
def main(argsl=None,  # type: List[str]
         args=None,  # type: argparse.Namespace
         executor=single_job_executor,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         makeTool=workflow.defaultMakeTool,  # type: Callable[..., Process]
         selectResources=None,  # type: Callable[[Dict[Text, int]], Dict[Text, int]]
         stdin=sys.stdin,  # type: IO[Any]
         stdout=sys.stdout,  # type: IO[Any]
         stderr=sys.stderr,  # type: IO[Any]
         versionfunc=versionstring,  # type: Callable[[], Text]
         job_order_object=None,  # type: MutableMapping[Text, Any]
         make_fs_access=StdFsAccess,  # type: Callable[[Text], StdFsAccess]
         fetcher_constructor=None,  # type: FetcherConstructorType
         resolver=tool_resolver,
         logger_handler=None,
         custom_schema_callback=None  # type: Callable[[], None]
         ):
    # type: (...) -> int

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        # If On windows platform, A default Docker Container is Used if not explicitely provided by user
        if onWindows() and not args.default_container:
            # This docker image is a minimal alpine image with bash installed(size 6 mb). source: https://github.com/frol/docker-alpine-bash
            args.default_container = windows_default_container_id

        # If caller provided custom arguments, it may be not every expected
        # option is set, so fill in no-op defaults to avoid crashing when
        # dereferencing them in args.
        for k, v in six.iteritems({'print_deps': False,
                     'print_pre': False,
                     'print_rdf': False,
                     'print_dot': False,
                     'relative_deps': False,
                     'tmp_outdir_prefix': 'tmp',
                     'tmpdir_prefix': 'tmp',
                     'print_input_deps': False,
                     'cachedir': None,
                     'quiet': False,
                     'debug': False,
                     'js_console': False,
                     'version': False,
                     'enable_dev': False,
                     'enable_ext': False,
                     'strict': True,
                     'skip_schemas': False,
                     'rdf_serializer': None,
                     'basedir': None,
                     'tool_help': False,
                     'workflow': None,
                     'job_order': None,
                     'pack': False,
                     'on_error': 'continue',
                     'relax_path_checks': False,
                     'validate': False,
                     'enable_ga4gh_tool_registry': False,
                     'ga4gh_tool_registries': [],
                     'find_default_container': None,
                                   'make_template': False,
                                   'overrides': None
        }):
            if not hasattr(args, k):
                setattr(args, k, v)

        if args.quiet:
            _logger.setLevel(logging.WARN)
        if args.debug:
            _logger.setLevel(logging.DEBUG)

        if args.version:
            print(versionfunc())
            return 0
        else:
            _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supportedCWLversions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")

        uri, tool_file_uri = resolve_tool_uri(args.workflow,
                                              resolver=resolver,
                                              fetcher_constructor=fetcher_constructor)

        overrides = []  # type: List[Dict[Text, Any]]

        try:
            job_order_object, input_basedir, jobloader = load_job_order(args,
                                                                        stdin,
                                                                        fetcher_constructor,
                                                                        overrides,
                                                                        tool_file_uri)
        except Exception as e:
            _logger.error(Text(e), exc_info=args.debug)

        if args.overrides:
            overrides.extend(load_overrides(file_uri(os.path.abspath(args.overrides)), tool_file_uri))

        try:
            document_loader, workflowobj, uri = fetch_document(uri, resolver=resolver,
                                                               fetcher_constructor=fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=args.enable_dev, strict=args.strict,
                                    preprocess_only=args.print_pre or args.pack,
                                    fetcher_constructor=fetcher_constructor,
                                    skip_schemas=args.skip_schemas,
                                    overrides=overrides)

            if args.print_pre:
                stdout.write(json.dumps(processobj, indent=4))
                return 0

            overrides.extend(metadata.get("cwltool:overrides", []))

            conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies", None)  # Text

            make_tool_kwds = vars(args)

            job_script_provider = None  # type: Callable[[Any, List[str]], Text]
            if conf_file or use_conda_dependencies:
                dependencies_configuration = DependenciesConfiguration(args)  # type: DependenciesConfiguration
                make_tool_kwds["job_script_provider"] = dependencies_configuration

            make_tool_kwds["find_default_container"] = functools.partial(find_default_container, args)
            make_tool_kwds["overrides"] = overrides

            tool = make_tool(document_loader, avsc_names, metadata, uri,
                             makeTool, make_tool_kwds)
            if args.make_template:
                yaml.safe_dump(generate_input_template(tool), sys.stdout,
                               default_flow_style=False, indent=4,
                               block_seq_indent=2)
                return 0

            if args.validate:
                _logger.info("Tool definition is valid")
                return 0

            if args.pack:
                stdout.write(print_pack(document_loader, processobj, uri, metadata))
                return 0

            if args.print_rdf:
                stdout.write(printrdf(tool, document_loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s",
                ", try again with --debug for more information.\nThe error was: "
                "%s" % exc if not args.debug else ".  The error was:",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp':
                sl = "/" if getattr(args, dirprefix).endswith("/") or dirprefix == "cachedir" else ""
                setattr(args, dirprefix,
                        os.path.abspath(getattr(args, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(args, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(args, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                setattr(args, 'move_outputs', "copy")
            setattr(args, "tmp_outdir_prefix", args.cachedir)

        try:
            job_order_object = init_job_order(job_order_object, args, tool,
                                              print_input_deps=args.print_input_deps,
                                              relative_deps=args.relative_deps,
                                              stdout=stdout,
                                              make_fs_access=make_fs_access,
                                              loader=jobloader,
                                              input_basedir=input_basedir)
        except SystemExit as e:
            return e.code

        if isinstance(job_order_object, int):
            return job_order_object

        try:
            setattr(args, 'basedir', input_basedir)
            del args.workflow
            del args.job_order
            (out, status) = executor(tool, job_order_object,
                                     makeTool=makeTool,
                                     select_resources=selectResources,
                                     make_fs_access=make_fs_access,
                                     **vars(args))

            # This is the workflow output, it needs to be written
            if out is not None:

                def locToPath(p):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in p:
                            del p[field]
                    if p["location"].startswith("file://"):
                        p["path"] = uri_file_path(p["location"])

                visit_class(out, ("File", "Directory"), locToPath)

                # Unsetting the Generation fron final output object
                visit_class(out,("File",), MutationManager().unset_generation)

                if isinstance(out, six.string_types):
                    stdout.write(out)
                else:
                    stdout.write(json.dumps(out, indent=4))
                stdout.write("\n")
                stdout.flush()

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            else:
                _logger.info(u"Final process status is %s", status)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error, try again with --debug for more "
                "information:\n%s", strip_dup_lineno(six.text_type(exc)), exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"Unhandled error, try again with --debug for more information:\n"
                "  %s", exc, exc_info=args.debug)
            return 1

    finally:
        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Exemple #41
0
def init_job_order(job_order_object,  # type: MutableMapping[Text, Any]
                   args,  # type: argparse.Namespace
                   t,     # type: Process
                   print_input_deps=False,  # type: bool
                   relative_deps=False,     # type: bool
                   stdout=sys.stdout,       # type: IO[Any]
                   make_fs_access=None,     # type: Callable[[Text], StdFsAccess]
                   loader=None,             # type: Loader
                   input_basedir=""         # type: Text
):
    # (...) -> Tuple[Dict[Text, Any], Text]

    if not job_order_object:
        namemap = {}  # type: Dict[Text, Text]
        records = []  # type: List[Text]
        toolparser = generate_parser(
            argparse.ArgumentParser(prog=args.workflow), t, namemap, records)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                exit(0)
            cmd_line = vars(toolparser.parse_args(args.job_order))
            for record_name in records:
                record = {}
                record_items = {
                    k: v for k, v in six.iteritems(cmd_line)
                    if k.startswith(record_name)}
                for key, value in six.iteritems(record_items):
                    record[key[len(record_name) + 1:]] = value
                    del cmd_line[key]
                cmd_line[str(record_name)] = record

            if cmd_line["job_order"]:
                try:
                    job_order_object = cast(MutableMapping, loader.resolve_ref(cmd_line["job_order"])[0])
                except Exception as e:
                    _logger.error(Text(e), exc_info=args.debug)
                    return 1
            else:
                job_order_object = {"id": args.workflow}

            del cmd_line["job_order"]

            job_order_object.update({namemap[k]: v for k, v in cmd_line.items()})

            if _logger.isEnabledFor(logging.DEBUG):
                _logger.debug(u"Parsed job order from command line: %s", json.dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object and len(t.tool["inputs"]) > 0:
        if toolparser:
            print(u"\nOptions for {} ".format(args.workflow))
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required, use --help for details")
        exit(1)

    if print_input_deps:
        printdeps(job_order_object, loader, stdout, relative_deps, "",
                  basedir=file_uri(str(input_basedir) + "/"))
        exit(0)

    def pathToLoc(p):
        if "location" not in p and "path" in p:
            p["location"] = p["path"]
            del p["path"]

    def addSizes(p):
        if 'location' in p:
            try:
                p["size"] = os.stat(p["location"][7:]).st_size  # strip off file://
            except OSError:
                pass
        elif 'contents' in p:
                p["size"] = len(p['contents'])
        else:
            return  # best effort

    ns = {}  # type: Dict[Text, Union[Dict[Any, Any], Text, Iterable[Text]]]
    ns.update(t.metadata.get("$namespaces", {}))
    ld = Loader(ns)

    def expand_formats(p):
        if "format" in p:
            p["format"] = ld.expand_url(p["format"], "")

    visit_class(job_order_object, ("File", "Directory"), pathToLoc)
    visit_class(job_order_object, ("File",), addSizes)
    visit_class(job_order_object, ("File",), expand_formats)
    adjustDirObjs(job_order_object, trim_listing)
    normalizeFilesDirs(job_order_object)

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]

    return job_order_object
Exemple #42
0
def tool_resolver(document_loader, uri):
    for r in [resolve_local, resolve_ga4gh_tool]:
        ret = r(document_loader, uri)
        if ret is not None:
            return ret
    return file_uri(os.path.abspath(uri), split_frag=True)
Exemple #43
0
def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False,
                   stdout=sys.stdout, make_fs_access=None, fetcher_constructor=None):
    # type: (argparse.Namespace, Process, IO[Any], bool, bool, IO[Any], Callable[[Text], StdFsAccess], Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]) -> Union[int, Tuple[Dict[Text, Any], Text]]

    job_order_object = None

    _jobloaderctx = jobloaderctx.copy()
    _jobloaderctx.update(t.metadata.get("$namespaces", {}))
    loader = Loader(_jobloaderctx, fetcher_constructor=fetcher_constructor)

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.round_trip_load(stdin)  # type: ignore
        job_order_object, _ = loader.resolve_all(job_order_object, file_uri(os.getcwd()) + "/")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        try:
            job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)
        except Exception as e:
            _logger.error(Text(e), exc_info=args.debug)
            return 1
        toolparser = None
    else:
        input_basedir = args.basedir if args.basedir else os.getcwd()
        namemap = {}  # type: Dict[Text, Text]
        records = []  # type: List[Text]
        toolparser = generate_parser(
            argparse.ArgumentParser(prog=args.workflow), t, namemap, records)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                return 0
            cmd_line = vars(toolparser.parse_args(args.job_order))
            for record_name in records:
                record = {}
                record_items = {
                    k: v for k, v in cmd_line.iteritems()
                    if k.startswith(record_name)}
                for key, value in record_items.iteritems():
                    record[key[len(record_name) + 1:]] = value
                    del cmd_line[key]
                cmd_line[str(record_name)] = record

            if cmd_line["job_order"]:
                try:
                    input_basedir = args.basedir if args.basedir else os.path.abspath(
                        os.path.dirname(cmd_line["job_order"]))
                    job_order_object = loader.resolve_ref(cmd_line["job_order"])
                except Exception as e:
                    _logger.error(Text(e), exc_info=args.debug)
                    return 1
            else:
                job_order_object = {"id": args.workflow}

            del cmd_line["job_order"]

            job_order_object.update({namemap[k]: v for k, v in cmd_line.items()})

            if _logger.isEnabledFor(logging.DEBUG):
                _logger.debug(u"Parsed job order from command line: %s", json.dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object and len(t.tool["inputs"]) > 0:
        if toolparser:
            print(u"\nOptions for {} ".format(args.workflow))
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required, use --help for details")
        return 1

    if print_input_deps:
        printdeps(job_order_object, loader, stdout, relative_deps, "",
                  basedir=file_uri(input_basedir + "/"))
        return 0

    def pathToLoc(p):
        if "location" not in p and "path" in p:
            p["location"] = p["path"]
            del p["path"]

    visit_class(job_order_object, ("File", "Directory"), pathToLoc)
    adjustDirObjs(job_order_object, trim_listing)
    normalizeFilesDirs(job_order_object)

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]

    return (job_order_object, input_basedir)
Exemple #44
0
def main(argsl=None,                   # type: List[str]
         args=None,                    # type: argparse.Namespace
         job_order_object=None,        # type: MutableMapping[Text, Any]
         stdin=sys.stdin,              # type: IO[Any]
         stdout=None,                  # type: Union[TextIO, StreamWriter]
         stderr=sys.stderr,            # type: IO[Any]
         versionfunc=versionstring,    # type: Callable[[], Text]
         logger_handler=None,          #
         custom_schema_callback=None,  # type: Callable[[], None]
         executor=None,                # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         loadingContext=None,          # type: LoadingContext
         runtimeContext=None           # type: RuntimeContext
        ):  # type: (...) -> int
    if not stdout:  # force UTF-8 even if the console is configured differently
        if (hasattr(sys.stdout, "encoding")  # type: ignore
                and sys.stdout.encoding != 'UTF-8'):  # type: ignore
            if PY3 and hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
            else:
                stdout = getwriter('utf-8')(sys.stdout)  # type: ignore
        else:
            stdout = cast(TextIO, sys.stdout)  # type: ignore

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler is not None:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    # pre-declared for finally block
    workflowobj = None
    prov_log_handler = None  # type: Optional[logging.StreamHandler]
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)
            if args.record_container_id:
                if not args.cidfile_dir:
                    args.cidfile_dir = os.getcwd()
                del args.record_container_id

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in iteritems(get_default_args()):
            if not hasattr(args, key):
                setattr(args, key, val)

        # Configure logging
        rdflib_logger = logging.getLogger("rdflib.term")
        rdflib_logger.addHandler(stderr_handler)
        rdflib_logger.setLevel(logging.ERROR)
        if args.quiet:
            # Silence STDERR, not an eventual provenance log file
            stderr_handler.setLevel(logging.WARN)
        if runtimeContext.debug:
            # Increase to debug for both stderr and provenance log file
            _logger.setLevel(logging.DEBUG)
            rdflib_logger.setLevel(logging.DEBUG)
        formatter = None  # type: Optional[logging.Formatter]
        if args.timestamps:
            formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                          "%Y-%m-%d %H:%M:%S")
            stderr_handler.setFormatter(formatter)
        ##

        if args.version:
            print(versionfunc())
            return 0
        _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supported_cwl_versions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback is not None:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")
        if args.provenance:
            if not args.compute_checksum:
                _logger.error("--provenance incompatible with --no-compute-checksum")
                return 1
            ro = ResearchObject(
                temp_prefix_ro=args.tmpdir_prefix, orcid=args.orcid,
                full_name=args.cwl_full_name)
            runtimeContext.research_obj = ro
            log_file_io = ro.open_log_file_for_activity(ro.engine_uuid)
            prov_log_handler = logging.StreamHandler(log_file_io)

            class ProvLogFormatter(logging.Formatter):
                """Enforce ISO8601 with both T and Z."""
                def __init__(self):  # type: () -> None
                    super(ProvLogFormatter, self).__init__(
                        "[%(asctime)sZ] %(message)s")

                def formatTime(self, record, datefmt=None):
                    # type: (logging.LogRecord, str) -> str
                    record_time = time.gmtime(record.created)
                    formatted_time = time.strftime("%Y-%m-%dT%H:%M:%S", record_time)
                    with_msecs = "%s,%03d" % (formatted_time, record.msecs)
                    return with_msecs
            prov_log_handler.setFormatter(ProvLogFormatter())
            _logger.addHandler(prov_log_handler)
            _logger.debug(u"[provenance] Logging to %s", log_file_io)
            if argsl is not None:
                # Log cwltool command line options to provenance file
                _logger.info("[cwltool] %s %s", sys.argv[0], u" ".join(argsl))
            _logger.debug(u"[cwltool] Arguments: %s", args)

        if loadingContext is None:
            loadingContext = LoadingContext(vars(args))
        else:
            loadingContext = loadingContext.copy()
        loadingContext.loader = default_loader(loadingContext.fetcher_constructor)
        loadingContext.research_obj = runtimeContext.research_obj
        loadingContext.disable_js_validation = \
            args.disable_js_validation or (not args.do_validate)
        loadingContext.construct_tool_object = getdefault(
            loadingContext.construct_tool_object, workflow.default_make_tool)
        loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver)
        loadingContext.do_update = not (args.pack or args.print_subgraph)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow, resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor)

        try_again_msg = "" if args.debug else ", try again with --debug for more information"

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, loadingContext.fetcher_constructor,
                loadingContext.overrides_list, tool_file_uri)

            if args.overrides:
                loadingContext.overrides_list.extend(load_overrides(
                    file_uri(os.path.abspath(args.overrides)), tool_file_uri))

            loadingContext, workflowobj, uri = fetch_document(
                uri, loadingContext)

            assert loadingContext.loader is not None

            if args.print_deps:
                printdeps(workflowobj, loadingContext.loader, stdout,
                           args.relative_deps, uri)
                return 0

            loadingContext, uri \
                = resolve_and_validate_document(loadingContext, workflowobj, uri,
                                    preprocess_only=(args.print_pre or args.pack),
                                    skip_schemas=args.skip_schemas)
            assert loadingContext.loader is not None
            processobj, metadata = loadingContext.loader.resolve_ref(uri)
            processobj = cast(CommentedMap, processobj)
            if args.pack:
                stdout.write(print_pack(loadingContext.loader, processobj, uri, metadata))
                return 0

            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(loadingContext.loader, processobj, uri, metadata))

            if args.print_pre:
                stdout.write(json_dumps(processobj, indent=4, sort_keys=True, separators=(',', ': ')))
                return 0

            tool = make_tool(uri, loadingContext)
            if args.make_template:
                def my_represent_none(self, data):  # pylint: disable=unused-argument
                    """Force clean representation of 'null'."""
                    return self.represent_scalar(u'tag:yaml.org,2002:null', u'null')
                yaml.RoundTripRepresenter.add_representer(type(None), my_represent_none)
                yaml.round_trip_dump(
                    generate_input_template(tool), sys.stdout,
                    default_flow_style=False, indent=4, block_seq_indent=2)
                return 0

            if args.validate:
                print("{} is valid CWL.".format(args.workflow))
                return 0

            if args.print_rdf:
                stdout.write(printrdf(tool, loadingContext.loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, loadingContext.loader.ctx, stdout)
                return 0

            if args.print_targets:
                for f in ("outputs", "steps", "inputs"):
                    if tool.tool[f]:
                        _logger.info("%s%s targets:", f[0].upper(), f[1:-1])
                        stdout.write("  "+"\n  ".join([shortname(t["id"]) for t in tool.tool[f]])+"\n")
                return 0

            if args.target:
                if isinstance(tool, Workflow):
                    url = urllib.parse.urlparse(tool.tool["id"])
                    if url.fragment:
                        extracted = get_subgraph([tool.tool["id"] + "/" + r for r in args.target], tool)
                    else:
                        extracted = get_subgraph([loadingContext.loader.fetcher.urljoin(tool.tool["id"], "#" + r)
                                                 for r in args.target],
                                                 tool)
                else:
                    _logger.error("Can only use --target on Workflows")
                    return 1
                loadingContext.loader.idx[extracted["id"]] = extracted
                tool = make_tool(extracted["id"],
                                 loadingContext)

            if args.print_subgraph:
                if "name" in tool.tool:
                    del tool.tool["name"]
                stdout.write(json_dumps(tool.tool, indent=4, sort_keys=True, separators=(',', ': ')))
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                exc if not args.debug else "",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool
        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path
            if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmpdir_prefix = default_mac_path

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                    else ""
                setattr(runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore())
        runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        try:
            initialized_job_order_object = init_job_order(
                job_order_object, args, tool, jobloader, stdout,
                print_input_deps=args.print_input_deps,
                relative_deps=args.relative_deps,
                make_fs_access=runtimeContext.make_fs_access,
                input_basedir=input_basedir,
                secret_store=runtimeContext.secret_store)
        except SystemExit as err:
            return err.code

        if not executor:
            if args.parallel:
                executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = executor.select_resources
            else:
                executor = SingleJobExecutor()
        assert executor is not None

        try:
            runtimeContext.basedir = input_basedir
            del args.workflow
            del args.job_order

            conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies", None)  # Text

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(args)
            else:
                runtimeContext.find_default_container = functools.partial(
                    find_default_container,
                    default_container=runtimeContext.default_container,
                    use_biocontainers=args.beta_use_biocontainers)

            (out, status) = executor(tool,
                                     initialized_job_order_object,
                                     runtimeContext,
                                     logger=_logger)

            if out is not None:
                if runtimeContext.research_obj is not None:
                    runtimeContext.research_obj.create_job(
                        out, None, True)

                def loc_to_path(obj):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if obj["location"].startswith("file://"):
                        obj["path"] = uri_file_path(obj["location"])

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ), MutationManager().unset_generation)

                if isinstance(out, string_types):
                    stdout.write(out)
                else:
                    stdout.write(json_dumps(out, indent=4,  # type: ignore
                                            ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()  # type: ignore

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            _logger.info(u"Final process status is %s", status)
            return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(Text(exc)),
                exc_info=args.debug)
            return 1
        except Exception as exc:  # pylint: disable=broad-except
            _logger.error(
                u"Unhandled error%s:\n  %s", try_again_msg, exc, exc_info=args.debug)
            return 1

    finally:
        if args and runtimeContext and runtimeContext.research_obj \
                and workflowobj:
            research_obj = runtimeContext.research_obj
            assert loadingContext is not None
            assert loadingContext.loader is not None
            prov_dependencies = prov_deps(workflowobj, loadingContext.loader, uri)
            research_obj.generate_snapshot(prov_dependencies)
            if prov_log_handler is not None:
                # Stop logging so we won't half-log adding ourself to RO
                _logger.debug(u"[provenance] Closing provenance log file %s",
                    prov_log_handler)
                _logger.removeHandler(prov_log_handler)
                # Ensure last log lines are written out
                prov_log_handler.flush()
                # Underlying WritableBagFile will add the tagfile to the manifest
                prov_log_handler.stream.close()
                prov_log_handler.close()
            research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Exemple #45
0
def main(argsl=None,                   # type: List[str]
         args=None,                    # type: argparse.Namespace
         job_order_object=None,        # type: MutableMapping[Text, Any]
         stdin=sys.stdin,              # type: IO[Any]
         stdout=None,                  # type: Union[TextIO, codecs.StreamWriter]
         stderr=sys.stderr,            # type: IO[Any]
         versionfunc=versionstring,    # type: Callable[[], Text]
         logger_handler=None,          #
         custom_schema_callback=None,  # type: Callable[[], None]
         executor=None,                # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         loadingContext=None,          # type: LoadingContext
         runtimeContext=None           # type: RuntimeContext
        ):  # type: (...) -> int
    if not stdout:  # force UTF-8 even if the console is configured differently
        if (hasattr(sys.stdout, "encoding")  # type: ignore
                and sys.stdout.encoding != 'UTF-8'):  # type: ignore
            if six.PY3 and hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
            else:
                stdout = codecs.getwriter('utf-8')(sys.stdout)  # type: ignore
        else:
            stdout = cast(TextIO, sys.stdout)  # type: ignore

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    # pre-declared for finally block
    workflowobj = None
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in six.iteritems(get_default_args()):
            if not hasattr(args, key):
                setattr(args, key, val)

        rdflib_logger = logging.getLogger("rdflib.term")
        rdflib_logger.addHandler(stderr_handler)
        rdflib_logger.setLevel(logging.ERROR)
        if args.quiet:
            _logger.setLevel(logging.WARN)
        if runtimeContext.debug:
            _logger.setLevel(logging.DEBUG)
            rdflib_logger.setLevel(logging.DEBUG)
        if args.timestamps:
            formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                          "%Y-%m-%d %H:%M:%S")
            stderr_handler.setFormatter(formatter)

        if args.version:
            print(versionfunc())
            return 0
        _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supported_cwl_versions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")
        #call function from provenance.py if the provenance flag is enabled.
        if args.provenance:
            if not args.compute_checksum:
                _logger.error("--provenance incompatible with --no-compute-checksum")
                return 1

            runtimeContext.research_obj = ResearchObject(
                temp_prefix_ro=args.tmpdir_prefix,
                # Optionals, might be None
                orcid=args.orcid,
                full_name=args.cwl_full_name)



        if loadingContext is None:
            loadingContext = LoadingContext(vars(args))
        else:
            loadingContext = loadingContext.copy()
        loadingContext.research_obj = runtimeContext.research_obj
        loadingContext.disable_js_validation = \
            args.disable_js_validation or (not args.do_validate)
        loadingContext.construct_tool_object = getdefault(
            loadingContext.construct_tool_object, workflow.default_make_tool)
        loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow, resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor)

        try_again_msg = "" if args.debug else ", try again with --debug for more information"

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, loadingContext.fetcher_constructor,
                loadingContext.overrides_list, tool_file_uri)

            if args.overrides:
                loadingContext.overrides_list.extend(load_overrides(
                    file_uri(os.path.abspath(args.overrides)), tool_file_uri))

            document_loader, workflowobj, uri = fetch_document(
                uri, resolver=loadingContext.resolver,
                fetcher_constructor=loadingContext.fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=loadingContext.enable_dev,
                                    strict=loadingContext.strict,
                                    preprocess_only=(args.print_pre or args.pack),
                                    fetcher_constructor=loadingContext.fetcher_constructor,
                                    skip_schemas=args.skip_schemas,
                                    overrides=loadingContext.overrides_list,
                                    do_validate=loadingContext.do_validate)
            if args.pack:
                stdout.write(print_pack(document_loader, processobj, uri, metadata))
                return 0
            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(document_loader, processobj, uri, metadata))

            if args.print_pre:
                stdout.write(json_dumps(processobj, indent=4))
                return 0

            loadingContext.overrides_list.extend(metadata.get("cwltool:overrides", []))

            tool = make_tool(document_loader, avsc_names,
                             metadata, uri, loadingContext)
            if args.make_template:
                yaml.safe_dump(generate_input_template(tool), sys.stdout,
                               default_flow_style=False, indent=4,
                               block_seq_indent=2)
                return 0

            if args.validate:
                _logger.info("Tool definition is valid")
                return 0

            if args.print_rdf:
                stdout.write(printrdf(tool, document_loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                exc if not args.debug else "",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool
        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                        else ""
                setattr(runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore())
        runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        try:
            initialized_job_order_object = init_job_order(
                job_order_object, args, tool, jobloader, stdout,
                print_input_deps=args.print_input_deps,
                relative_deps=args.relative_deps,
                make_fs_access=runtimeContext.make_fs_access,
                input_basedir=input_basedir,
                secret_store=runtimeContext.secret_store)
        except SystemExit as err:
            return err.code

        if not executor:
            if args.parallel:
                executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = executor.select_resources
            else:
                executor = SingleJobExecutor()
        assert executor is not None

        try:
            runtimeContext.basedir = input_basedir
            del args.workflow
            del args.job_order

            conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies", None)  # Text

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(args)

            runtimeContext.find_default_container = functools.partial(
                find_default_container,
                default_container=runtimeContext.default_container,
                use_biocontainers=args.beta_use_biocontainers)
            (out, status) = executor(tool,
                                     initialized_job_order_object,
                                     runtimeContext,
                                     logger=_logger)

            if out is not None:
                def loc_to_path(obj):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if obj["location"].startswith("file://"):
                        obj["path"] = uri_file_path(obj["location"])

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ), MutationManager().unset_generation)

                if isinstance(out, string_types):
                    stdout.write(out)
                else:
                    stdout.write(json_dumps(out, indent=4,  # type: ignore
                                            ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()  # type: ignore

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            _logger.info(u"Final process status is %s", status)
            return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(six.text_type(exc)),
                exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"Unhandled error%s:\n  %s", try_again_msg, exc, exc_info=args.debug)
            return 1

    finally:
        if args and runtimeContext and runtimeContext.research_obj \
                and args.rm_tmpdir and workflowobj:
            #adding all related cwl files to RO
            prov_dependencies = printdeps(
                workflowobj, document_loader, stdout, args.relative_deps, uri,
                runtimeContext.research_obj)
            prov_dep = prov_dependencies[1]
            assert prov_dep
            runtimeContext.research_obj.generate_snapshot(prov_dep)

            runtimeContext.research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Exemple #46
0
def init_job_order(job_order_object,        # type: Optional[MutableMapping[Text, Any]]
                   args,                    # type: argparse.Namespace
                   t,                       # type: Process
                   loader,                  # type: Loader
                   stdout,                  # type: Union[TextIO, StreamWriter]
                   print_input_deps=False,  # type: bool
                   relative_deps=False,     # type: bool
                   make_fs_access=StdFsAccess,  # type: Callable[[Text], StdFsAccess]
                   input_basedir="",        # type: Text
                   secret_store=None        # type: SecretStore
                  ):  # type: (...) -> MutableMapping[Text, Any]
    secrets_req, _ = t.get_requirement("http://commonwl.org/cwltool#Secrets")
    if not job_order_object:
        namemap = {}  # type: Dict[Text, Text]
        records = []  # type: List[Text]
        toolparser = generate_parser(
            argparse.ArgumentParser(prog=args.workflow), t, namemap, records)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                exit(0)
            cmd_line = vars(toolparser.parse_args(args.job_order))
            for record_name in records:
                record = {}
                record_items = {
                    k: v for k, v in six.iteritems(cmd_line)
                    if k.startswith(record_name)}
                for key, value in six.iteritems(record_items):
                    record[key[len(record_name) + 1:]] = value
                    del cmd_line[key]
                cmd_line[str(record_name)] = record

            if cmd_line["job_order"]:
                try:
                    job_order_object = cast(
                        MutableMapping, loader.resolve_ref(cmd_line["job_order"])[0])
                except Exception as e:
                    _logger.error(Text(e), exc_info=args.debug)
                    exit(1)
            else:
                job_order_object = {"id": args.workflow}

            del cmd_line["job_order"]

            job_order_object.update({namemap[k]: v for k, v in cmd_line.items()})

            if secret_store and secrets_req:
                secret_store.store(
                    [shortname(sc) for sc in secrets_req["secrets"]], job_order_object)

            if _logger.isEnabledFor(logging.DEBUG):
                _logger.debug(u"Parsed job order from command line: %s",
                              json_dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (
                not job_order_object or shortname(inp["id"]) not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object:
        if len(t.tool["inputs"]) > 0:
            if toolparser:
                print(u"\nOptions for {} ".format(args.workflow))
                toolparser.print_help()
            _logger.error("")
            _logger.error("Input object required, use --help for details")
            exit(1)
        else:
            job_order_object = {}

    if print_input_deps:
        printdeps(job_order_object, loader, stdout, relative_deps, "",
                  basedir=file_uri(str(input_basedir) + "/"))
        exit(0)

    def path_to_loc(p):
        if "location" not in p and "path" in p:
            p["location"] = p["path"]
            del p["path"]

    ns = {}  # type: Dict[Text, Union[Dict[Any, Any], Text, Iterable[Text]]]
    ns.update(t.metadata.get("$namespaces", {}))
    ld = Loader(ns)

    def expand_formats(p):
        if "format" in p:
            p["format"] = ld.expand_url(p["format"], "")

    visit_class(job_order_object, ("File", "Directory"), path_to_loc)
    visit_class(job_order_object, ("File",), functools.partial(add_sizes, make_fs_access(input_basedir)))
    visit_class(job_order_object, ("File",), expand_formats)
    adjustDirObjs(job_order_object, trim_listing)
    normalizeFilesDirs(job_order_object)

    if secret_store and secrets_req:
        secret_store.store(
            [shortname(sc) for sc in secrets_req["secrets"]], job_order_object)

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]
    return job_order_object
Exemple #47
0
 def _check_adjust(a_file):
     a_file["location"] = file_uri(pm.mapper(a_file["location"])[1])
     if "contents" in a_file:
         del a_file["contents"]
     return a_file
Exemple #48
0
 def glob(self, pattern):  # type: (Text) -> List[Text]
     return [file_uri(str(self._abs(l))) for l in glob.glob(self._abs(pattern))]