Exemple #1
0
def pack(document_loader,  # type: Loader
         processobj,       # type: Union[Dict[Text, Any], List[Dict[Text, Any]]]
         uri,              # type: Text
         metadata,         # type: Dict[Text, Text]
         rewrite_out=None  # type: Dict[Text, Text]
        ):  # type: (...) -> Dict[Text, Any]

    document_loader = SubLoader(document_loader)
    document_loader.idx = {}
    if isinstance(processobj, dict):
        document_loader.idx[processobj["id"]] = CommentedMap(iteritems(processobj))
    elif isinstance(processobj, list):
        _, frag = urllib.parse.urldefrag(uri)
        for po in processobj:
            if not frag:
                if po["id"].endswith("#main"):
                    uri = po["id"]
            document_loader.idx[po["id"]] = CommentedMap(iteritems(po))

    def loadref(base, uri):
        # type: (Optional[Text], Text) -> Union[Dict, List, Text, None]
        return document_loader.resolve_ref(uri, base_url=base)[0]

    ids = set()  # type: Set[Text]
    find_ids(processobj, ids)

    runs = {uri}
    find_run(processobj, loadref, runs)

    for f in runs:
        find_ids(document_loader.resolve_ref(f)[0], ids)

    names = set()  # type: Set[Text]
    if rewrite_out is None:
        rewrite = {}  # type: Dict[Text, Text]
    else:
        rewrite = rewrite_out

    mainpath, _ = urllib.parse.urldefrag(uri)

    def rewrite_id(r, mainuri):
        # type: (Text, Text) -> None
        if r == mainuri:
            rewrite[r] = "#main"
        elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"):
            if r[len(mainuri):].startswith("#main/"):
                rewrite[r] = "#" + uniquename(r[len(mainuri)+1:], names)
            else:
                rewrite[r] = "#" + uniquename("main/"+r[len(mainuri)+1:], names)
        else:
            path, frag = urllib.parse.urldefrag(r)
            if path == mainpath:
                rewrite[r] = "#" + uniquename(frag, names)
            else:
                if path not in rewrite:
                    rewrite[path] = "#" + uniquename(shortname(path), names)

    sortedids = sorted(ids)

    for r in sortedids:
        rewrite_id(r, uri)

    packed = {"$graph": [], "cwlVersion": metadata["cwlVersion"]
              }  # type: Dict[Text, Any]
    namespaces = metadata.get('$namespaces', None)

    schemas = set()  # type: Set[Text]
    for r in sorted(runs):
        dcr, metadata = document_loader.resolve_ref(r)
        if isinstance(dcr, CommentedSeq):
            dcr = dcr[0]
            dcr = cast(CommentedMap, dcr)
        if not isinstance(dcr, dict):
            continue
        for doc in (dcr, metadata):
            if "$schemas" in doc:
                for s in doc["$schemas"]:
                    schemas.add(s)
        if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"):
            continue
        dc = cast(Dict[Text, Any], copy.deepcopy(dcr))
        v = rewrite[r]
        dc["id"] = v
        for n in ("name", "cwlVersion", "$namespaces", "$schemas"):
            if n in dc:
                del dc[n]
        packed["$graph"].append(dc)

    if schemas:
        packed["$schemas"] = list(schemas)

    for r in list(rewrite.keys()):
        v = rewrite[r]
        replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#", v + "/")

    import_embed(packed, set())

    if len(packed["$graph"]) == 1:
        # duplicate 'cwlVersion' inside $graph when there is a single item
        # because we're printing contents inside '$graph' rather than whole dict
        packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"]
    if namespaces:
        packed["$graph"][0]["$namespaces"] = dict(cast(Dict, namespaces))

    return packed
Exemple #2
0
def pack(
    loadingContext: LoadingContext,
    uri: str,
    rewrite_out: Optional[Dict[str, str]] = None,
    loader: Optional[Loader] = None,
) -> CWLObjectType:

    # The workflow document we have in memory right now may have been
    # updated to the internal CWL version.  We need to reload the
    # document to go back to its original version.
    #
    # What's going on here is that the updater replaces the
    # documents/fragments in the index with updated ones, the
    # index is also used as a cache, so we need to go through the
    # loading process with an empty index and updating turned off
    # so we have the original un-updated documents.
    #
    loadingContext = loadingContext.copy()
    document_loader = SubLoader(loader or loadingContext.loader or Loader({}))
    loadingContext.do_update = False
    loadingContext.loader = document_loader
    loadingContext.loader.idx = {}
    loadingContext.metadata = {}
    loadingContext, docobj, uri = fetch_document(uri, loadingContext)
    loadingContext, fileuri = resolve_and_validate_document(
        loadingContext, docobj, uri, preprocess_only=True
    )
    if loadingContext.loader is None:
        raise Exception("loadingContext.loader cannot be none")
    processobj, metadata = loadingContext.loader.resolve_ref(uri)
    document_loader = loadingContext.loader

    if isinstance(processobj, MutableMapping):
        document_loader.idx[processobj["id"]] = CommentedMap(processobj.items())
    elif isinstance(processobj, MutableSequence):
        _, frag = urllib.parse.urldefrag(uri)
        for po in processobj:
            if not frag:
                if po["id"].endswith("#main"):
                    uri = po["id"]
            document_loader.idx[po["id"]] = CommentedMap(po.items())
        document_loader.idx[metadata["id"]] = CommentedMap(metadata.items())

    found_versions = {
        cast(str, loadingContext.metadata["cwlVersion"])
    }  # type: Set[str]

    def loadref(base: Optional[str], lr_uri: str) -> ResolveType:
        lr_loadingContext = loadingContext.copy()
        lr_loadingContext.metadata = {}
        lr_loadingContext, lr_workflowobj, lr_uri = fetch_document(
            lr_uri, lr_loadingContext
        )
        lr_loadingContext, lr_uri = resolve_and_validate_document(
            lr_loadingContext, lr_workflowobj, lr_uri
        )
        found_versions.add(cast(str, lr_loadingContext.metadata["cwlVersion"]))
        if lr_loadingContext.loader is None:
            raise Exception("loader should not be None")
        return lr_loadingContext.loader.resolve_ref(lr_uri, base_url=base)[0]

    input_ids: Set[str] = set()
    output_ids: Set[str] = set()

    if isinstance(processobj, MutableSequence):
        mainobj = processobj[0]
    else:
        mainobj = processobj
    find_ids(cast(Dict[str, Any], mainobj)["inputs"], input_ids)
    find_ids(cast(Dict[str, Any], mainobj)["outputs"], output_ids)

    runs = {uri}
    find_run(processobj, loadref, runs)

    # Figure out the highest version, everything needs to be updated
    # to it.
    m = 0
    for fv in found_versions:
        m = max(m, ORDERED_VERSIONS.index(fv))
    update_to_version = ORDERED_VERSIONS[m]

    for f in runs:
        find_ids(document_loader.resolve_ref(f)[0], input_ids)

    input_names: Set[str] = set()
    output_names: Set[str] = set()

    rewrite_inputs: Dict[str, str] = {}
    rewrite_outputs: Dict[str, str] = {}

    mainpath, _ = urllib.parse.urldefrag(uri)

    def rewrite_id(
        r: str, mainuri: str, rewrite: Dict[str, str], names: Set[str]
    ) -> None:
        if r == mainuri:
            rewrite[r] = "#main"
        elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"):
            if r[len(mainuri) :].startswith("#main/"):
                rewrite[r] = "#" + uniquename(r[len(mainuri) + 1 :], names)
            else:
                rewrite[r] = "#" + uniquename("main/" + r[len(mainuri) + 1 :], names)
        else:
            path, frag = urllib.parse.urldefrag(r)
            if path == mainpath:
                rewrite[r] = "#" + uniquename(frag, names)
            else:
                if path not in rewrite:
                    rewrite[path] = "#" + uniquename(shortname(path), names)

    sorted_input_ids = sorted(input_ids)
    sorted_output_ids = sorted(output_ids)

    for r in sorted_input_ids:
        rewrite_id(r, uri, rewrite_inputs, input_names)
    for r in sorted_output_ids:
        rewrite_id(r, uri, rewrite_outputs, output_names)

    packed = CommentedMap(
        (("$graph", CommentedSeq()), ("cwlVersion", update_to_version))
    )
    namespaces = metadata.get("$namespaces", None)

    schemas: Set[str] = set()
    if "$schemas" in metadata:
        for each_schema in metadata["$schemas"]:
            schemas.add(each_schema)
    for r in sorted(runs):
        dcr, metadata = document_loader.resolve_ref(r)
        if isinstance(dcr, CommentedSeq):
            dcr = dcr[0]
            dcr = cast(CommentedMap, dcr)
        if not isinstance(dcr, MutableMapping):
            continue

        dcr = update(
            dcr,
            document_loader,
            r,
            loadingContext.enable_dev,
            metadata,
            update_to_version,
        )

        if ORIGINAL_CWLVERSION in metadata:
            del metadata[ORIGINAL_CWLVERSION]
        if ORIGINAL_CWLVERSION in dcr:
            del dcr[ORIGINAL_CWLVERSION]

        if "$schemas" in metadata:
            for s in metadata["$schemas"]:
                schemas.add(s)
        if dcr.get("class") not in (
            "Workflow",
            "CommandLineTool",
            "ExpressionTool",
            "Operation",
        ):
            continue
        dc = cast(Dict[str, Any], copy.deepcopy(dcr))
        v = rewrite_inputs[r]
        dc["id"] = v
        for n in ("name", "cwlVersion", "$namespaces", "$schemas"):
            if n in dc:
                del dc[n]
        packed["$graph"].append(dc)

    if schemas:
        packed["$schemas"] = list(schemas)
    if namespaces:
        packed["$namespaces"] = namespaces

    save_outputs = packed["$graph"][0].pop("outputs")
    for r in list(rewrite_inputs.keys()):
        v = rewrite_inputs[r]
        replace_refs(packed, rewrite_inputs, r + "/" if "#" in r else r + "#", v + "/")

    import_embed(packed, set())

    packed["$graph"][0]["outputs"] = save_outputs
    for r in list(rewrite_outputs.keys()):
        v = rewrite_outputs[r]
        replace_refs(
            packed["$graph"][0]["outputs"],
            rewrite_outputs,
            r + "/" if "#" in r else r + "#",
            v + "/",
        )

    for r in list(
        rewrite_inputs.keys()
    ):  # again, to process the outputSource references
        v = rewrite_inputs[r]
        replace_refs(packed, rewrite_inputs, r + "/" if "#" in r else r + "#", v + "/")

    if len(packed["$graph"]) == 1:
        # duplicate 'cwlVersion', '$schemas', and '$namespaces' inside '$graph'
        # when there is only a single item because main.print_pack() will print
        # the contents inside '$graph' rather than whole dict in this case
        packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"]
        if schemas:
            packed["$graph"][0]["$schemas"] = list(schemas)
        if namespaces:
            packed["$graph"][0]["$namespaces"] = namespaces

    if rewrite_out is not None:
        rewrite_out.update(rewrite_inputs)
        rewrite_out.update(rewrite_outputs)

    return packed
Exemple #3
0
def pack(
    loadingContext: LoadingContext,
    uri,  # type: str
    rewrite_out=None,  # type: Optional[Dict[str, str]]
    loader=None,  # type: Optional[Loader]
):  # type: (...) -> Dict[str, Any]

    # The workflow document we have in memory right now may have been
    # updated to the internal CWL version.  We need to reload the
    # document to go back to its original version.
    #
    # What's going on here is that the updater replaces the
    # documents/fragments in the index with updated ones, the
    # index is also used as a cache, so we need to go through the
    # loading process with an empty index and updating turned off
    # so we have the original un-updated documents.
    #
    loadingContext = loadingContext.copy()
    document_loader = SubLoader(loader or loadingContext.loader or Loader({}))
    loadingContext.do_update = False
    loadingContext.loader = document_loader
    loadingContext.loader.idx = {}
    loadingContext.metadata = {}
    loadingContext, docobj, uri = fetch_document(uri, loadingContext)
    loadingContext, fileuri = resolve_and_validate_document(
        loadingContext, docobj, uri, preprocess_only=True
    )
    if loadingContext.loader is None:
        raise Exception("loadingContext.loader cannot be none")
    processobj, metadata = loadingContext.loader.resolve_ref(uri)
    document_loader = loadingContext.loader

    if isinstance(processobj, MutableMapping):
        document_loader.idx[processobj["id"]] = CommentedMap(processobj.items())
    elif isinstance(processobj, MutableSequence):
        _, frag = urllib.parse.urldefrag(uri)
        for po in processobj:
            if not frag:
                if po["id"].endswith("#main"):
                    uri = po["id"]
            document_loader.idx[po["id"]] = CommentedMap(po.items())
        document_loader.idx[metadata["id"]] = CommentedMap(metadata.items())

    def loadref(base, uri):
        # type: (Optional[str], str) -> ResolveType
        return document_loader.resolve_ref(uri, base_url=base)[0]

    ids = set()  # type: Set[str]
    find_ids(processobj, ids)

    runs = {uri}
    find_run(processobj, loadref, runs)

    for f in runs:
        find_ids(document_loader.resolve_ref(f)[0], ids)

    names = set()  # type: Set[str]
    if rewrite_out is None:
        rewrite = {}  # type: Dict[str, str]
    else:
        rewrite = rewrite_out

    mainpath, _ = urllib.parse.urldefrag(uri)

    def rewrite_id(r, mainuri):
        # type: (str, str) -> None
        if r == mainuri:
            rewrite[r] = "#main"
        elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"):
            if r[len(mainuri) :].startswith("#main/"):
                rewrite[r] = "#" + uniquename(r[len(mainuri) + 1 :], names)
            else:
                rewrite[r] = "#" + uniquename("main/" + r[len(mainuri) + 1 :], names)
        else:
            path, frag = urllib.parse.urldefrag(r)
            if path == mainpath:
                rewrite[r] = "#" + uniquename(frag, names)
            else:
                if path not in rewrite:
                    rewrite[path] = "#" + uniquename(shortname(path), names)

    sortedids = sorted(ids)

    for r in sortedids:
        rewrite_id(r, uri)

    packed = CommentedMap(
        (("$graph", CommentedSeq()), ("cwlVersion", metadata["cwlVersion"]))
    )
    namespaces = metadata.get("$namespaces", None)

    schemas = set()  # type: Set[str]
    if "$schemas" in metadata:
        for each_schema in metadata["$schemas"]:
            schemas.add(each_schema)
    for r in sorted(runs):
        dcr, metadata = document_loader.resolve_ref(r)
        if isinstance(dcr, CommentedSeq):
            dcr = dcr[0]
            dcr = cast(CommentedMap, dcr)
        if not isinstance(dcr, MutableMapping):
            continue
        if "$schemas" in metadata:
            for s in metadata["$schemas"]:
                schemas.add(s)
        if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"):
            continue
        dc = cast(Dict[str, Any], copy.deepcopy(dcr))
        v = rewrite[r]
        dc["id"] = v
        for n in ("name", "cwlVersion", "$namespaces", "$schemas"):
            if n in dc:
                del dc[n]
        packed["$graph"].append(dc)

    if schemas:
        packed["$schemas"] = list(schemas)

    for r in list(rewrite.keys()):
        v = rewrite[r]
        replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#", v + "/")

    import_embed(packed, set())

    if len(packed["$graph"]) == 1:
        # duplicate 'cwlVersion' and $schemas inside $graph when there is only
        # a single item because we will print the contents inside '$graph'
        # rather than whole dict
        packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"]
        if schemas:
            packed["$graph"][0]["$schemas"] = list(schemas)
    # always include $namespaces in the #main
    if namespaces:
        packed["$graph"][0]["$namespaces"] = namespaces

    return packed
Exemple #4
0
def pack(
        document_loader: Loader,
        processobj,  # type: Union[Dict[str, Any], List[Dict[str, Any]]]
        uri,  # type: str
        metadata,  # type: Dict[str, str]
        rewrite_out=None,  # type: Optional[Dict[str, str]]
):  # type: (...) -> Dict[str, Any]

    document_loader = SubLoader(document_loader)
    document_loader.idx = {}
    if isinstance(processobj, MutableMapping):
        document_loader.idx[processobj["id"]] = CommentedMap(
            processobj.items())
    elif isinstance(processobj, MutableSequence):
        _, frag = urllib.parse.urldefrag(uri)
        for po in processobj:
            if not frag:
                if po["id"].endswith("#main"):
                    uri = po["id"]
            document_loader.idx[po["id"]] = CommentedMap(po.items())
        document_loader.idx[metadata["id"]] = CommentedMap(metadata.items())

    def loadref(base, uri):
        # type: (Optional[str], str) -> Union[Dict[str, Any], List[Dict[str, Any]], str, None]
        return document_loader.resolve_ref(uri, base_url=base)[0]

    ids = set()  # type: Set[str]
    find_ids(processobj, ids)

    runs = {uri}
    find_run(processobj, loadref, runs)

    for f in runs:
        find_ids(document_loader.resolve_ref(f)[0], ids)

    names = set()  # type: Set[str]
    if rewrite_out is None:
        rewrite = {}  # type: Dict[str, str]
    else:
        rewrite = rewrite_out

    mainpath, _ = urllib.parse.urldefrag(uri)

    def rewrite_id(r, mainuri):
        # type: (str, str) -> None
        if r == mainuri:
            rewrite[r] = "#main"
        elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"):
            if r[len(mainuri):].startswith("#main/"):
                rewrite[r] = "#" + uniquename(r[len(mainuri) + 1:], names)
            else:
                rewrite[r] = "#" + uniquename("main/" + r[len(mainuri) + 1:],
                                              names)
        else:
            path, frag = urllib.parse.urldefrag(r)
            if path == mainpath:
                rewrite[r] = "#" + uniquename(frag, names)
            else:
                if path not in rewrite:
                    rewrite[path] = "#" + uniquename(shortname(path), names)

    sortedids = sorted(ids)

    for r in sortedids:
        rewrite_id(r, uri)

    packed = CommentedMap(
        (("$graph", CommentedSeq()), ("cwlVersion", metadata["cwlVersion"])))
    namespaces = metadata.get("$namespaces", None)

    schemas = set()  # type: Set[str]
    if "$schemas" in metadata:
        for each_schema in metadata["$schemas"]:
            schemas.add(each_schema)
    for r in sorted(runs):
        dcr, metadata = document_loader.resolve_ref(r)
        if isinstance(dcr, CommentedSeq):
            dcr = dcr[0]
            dcr = cast(CommentedMap, dcr)
        if not isinstance(dcr, MutableMapping):
            continue
        metadata = cast(Dict[str, Any], metadata)
        if "$schemas" in metadata:
            for s in metadata["$schemas"]:
                schemas.add(s)
        if dcr.get("class") not in ("Workflow", "CommandLineTool",
                                    "ExpressionTool"):
            continue
        dc = cast(Dict[str, Any], copy.deepcopy(dcr))
        v = rewrite[r]
        dc["id"] = v
        for n in ("name", "cwlVersion", "$namespaces", "$schemas"):
            if n in dc:
                del dc[n]
        packed["$graph"].append(dc)

    if schemas:
        packed["$schemas"] = list(schemas)

    for r in list(rewrite.keys()):
        v = rewrite[r]
        replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#",
                     v + "/")

    import_embed(packed, set())

    if len(packed["$graph"]) == 1:
        # duplicate 'cwlVersion' and $schemas inside $graph when there is only
        # a single item because we will print the contents inside '$graph'
        # rather than whole dict
        packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"]
        if schemas:
            packed["$graph"][0]["$schemas"] = list(schemas)
    # always include $namespaces in the #main
    if namespaces:
        packed["$graph"][0]["$namespaces"] = namespaces

    return packed