def pack(document_loader, # type: Loader processobj, # type: Union[Dict[Text, Any], List[Dict[Text, Any]]] uri, # type: Text metadata, # type: Dict[Text, Text] rewrite_out=None # type: Dict[Text, Text] ): # type: (...) -> Dict[Text, Any] document_loader = SubLoader(document_loader) document_loader.idx = {} if isinstance(processobj, dict): document_loader.idx[processobj["id"]] = CommentedMap(iteritems(processobj)) elif isinstance(processobj, list): _, frag = urllib.parse.urldefrag(uri) for po in processobj: if not frag: if po["id"].endswith("#main"): uri = po["id"] document_loader.idx[po["id"]] = CommentedMap(iteritems(po)) def loadref(base, uri): # type: (Optional[Text], Text) -> Union[Dict, List, Text, None] return document_loader.resolve_ref(uri, base_url=base)[0] ids = set() # type: Set[Text] find_ids(processobj, ids) runs = {uri} find_run(processobj, loadref, runs) for f in runs: find_ids(document_loader.resolve_ref(f)[0], ids) names = set() # type: Set[Text] if rewrite_out is None: rewrite = {} # type: Dict[Text, Text] else: rewrite = rewrite_out mainpath, _ = urllib.parse.urldefrag(uri) def rewrite_id(r, mainuri): # type: (Text, Text) -> None if r == mainuri: rewrite[r] = "#main" elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"): if r[len(mainuri):].startswith("#main/"): rewrite[r] = "#" + uniquename(r[len(mainuri)+1:], names) else: rewrite[r] = "#" + uniquename("main/"+r[len(mainuri)+1:], names) else: path, frag = urllib.parse.urldefrag(r) if path == mainpath: rewrite[r] = "#" + uniquename(frag, names) else: if path not in rewrite: rewrite[path] = "#" + uniquename(shortname(path), names) sortedids = sorted(ids) for r in sortedids: rewrite_id(r, uri) packed = {"$graph": [], "cwlVersion": metadata["cwlVersion"] } # type: Dict[Text, Any] namespaces = metadata.get('$namespaces', None) schemas = set() # type: Set[Text] for r in sorted(runs): dcr, metadata = document_loader.resolve_ref(r) if isinstance(dcr, CommentedSeq): dcr = dcr[0] dcr = cast(CommentedMap, dcr) if not isinstance(dcr, dict): continue for doc in (dcr, metadata): if "$schemas" in doc: for s in doc["$schemas"]: schemas.add(s) if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"): continue dc = cast(Dict[Text, Any], copy.deepcopy(dcr)) v = rewrite[r] dc["id"] = v for n in ("name", "cwlVersion", "$namespaces", "$schemas"): if n in dc: del dc[n] packed["$graph"].append(dc) if schemas: packed["$schemas"] = list(schemas) for r in list(rewrite.keys()): v = rewrite[r] replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#", v + "/") import_embed(packed, set()) if len(packed["$graph"]) == 1: # duplicate 'cwlVersion' inside $graph when there is a single item # because we're printing contents inside '$graph' rather than whole dict packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"] if namespaces: packed["$graph"][0]["$namespaces"] = dict(cast(Dict, namespaces)) return packed
def pack( loadingContext: LoadingContext, uri: str, rewrite_out: Optional[Dict[str, str]] = None, loader: Optional[Loader] = None, ) -> CWLObjectType: # The workflow document we have in memory right now may have been # updated to the internal CWL version. We need to reload the # document to go back to its original version. # # What's going on here is that the updater replaces the # documents/fragments in the index with updated ones, the # index is also used as a cache, so we need to go through the # loading process with an empty index and updating turned off # so we have the original un-updated documents. # loadingContext = loadingContext.copy() document_loader = SubLoader(loader or loadingContext.loader or Loader({})) loadingContext.do_update = False loadingContext.loader = document_loader loadingContext.loader.idx = {} loadingContext.metadata = {} loadingContext, docobj, uri = fetch_document(uri, loadingContext) loadingContext, fileuri = resolve_and_validate_document( loadingContext, docobj, uri, preprocess_only=True ) if loadingContext.loader is None: raise Exception("loadingContext.loader cannot be none") processobj, metadata = loadingContext.loader.resolve_ref(uri) document_loader = loadingContext.loader if isinstance(processobj, MutableMapping): document_loader.idx[processobj["id"]] = CommentedMap(processobj.items()) elif isinstance(processobj, MutableSequence): _, frag = urllib.parse.urldefrag(uri) for po in processobj: if not frag: if po["id"].endswith("#main"): uri = po["id"] document_loader.idx[po["id"]] = CommentedMap(po.items()) document_loader.idx[metadata["id"]] = CommentedMap(metadata.items()) found_versions = { cast(str, loadingContext.metadata["cwlVersion"]) } # type: Set[str] def loadref(base: Optional[str], lr_uri: str) -> ResolveType: lr_loadingContext = loadingContext.copy() lr_loadingContext.metadata = {} lr_loadingContext, lr_workflowobj, lr_uri = fetch_document( lr_uri, lr_loadingContext ) lr_loadingContext, lr_uri = resolve_and_validate_document( lr_loadingContext, lr_workflowobj, lr_uri ) found_versions.add(cast(str, lr_loadingContext.metadata["cwlVersion"])) if lr_loadingContext.loader is None: raise Exception("loader should not be None") return lr_loadingContext.loader.resolve_ref(lr_uri, base_url=base)[0] input_ids: Set[str] = set() output_ids: Set[str] = set() if isinstance(processobj, MutableSequence): mainobj = processobj[0] else: mainobj = processobj find_ids(cast(Dict[str, Any], mainobj)["inputs"], input_ids) find_ids(cast(Dict[str, Any], mainobj)["outputs"], output_ids) runs = {uri} find_run(processobj, loadref, runs) # Figure out the highest version, everything needs to be updated # to it. m = 0 for fv in found_versions: m = max(m, ORDERED_VERSIONS.index(fv)) update_to_version = ORDERED_VERSIONS[m] for f in runs: find_ids(document_loader.resolve_ref(f)[0], input_ids) input_names: Set[str] = set() output_names: Set[str] = set() rewrite_inputs: Dict[str, str] = {} rewrite_outputs: Dict[str, str] = {} mainpath, _ = urllib.parse.urldefrag(uri) def rewrite_id( r: str, mainuri: str, rewrite: Dict[str, str], names: Set[str] ) -> None: if r == mainuri: rewrite[r] = "#main" elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"): if r[len(mainuri) :].startswith("#main/"): rewrite[r] = "#" + uniquename(r[len(mainuri) + 1 :], names) else: rewrite[r] = "#" + uniquename("main/" + r[len(mainuri) + 1 :], names) else: path, frag = urllib.parse.urldefrag(r) if path == mainpath: rewrite[r] = "#" + uniquename(frag, names) else: if path not in rewrite: rewrite[path] = "#" + uniquename(shortname(path), names) sorted_input_ids = sorted(input_ids) sorted_output_ids = sorted(output_ids) for r in sorted_input_ids: rewrite_id(r, uri, rewrite_inputs, input_names) for r in sorted_output_ids: rewrite_id(r, uri, rewrite_outputs, output_names) packed = CommentedMap( (("$graph", CommentedSeq()), ("cwlVersion", update_to_version)) ) namespaces = metadata.get("$namespaces", None) schemas: Set[str] = set() if "$schemas" in metadata: for each_schema in metadata["$schemas"]: schemas.add(each_schema) for r in sorted(runs): dcr, metadata = document_loader.resolve_ref(r) if isinstance(dcr, CommentedSeq): dcr = dcr[0] dcr = cast(CommentedMap, dcr) if not isinstance(dcr, MutableMapping): continue dcr = update( dcr, document_loader, r, loadingContext.enable_dev, metadata, update_to_version, ) if ORIGINAL_CWLVERSION in metadata: del metadata[ORIGINAL_CWLVERSION] if ORIGINAL_CWLVERSION in dcr: del dcr[ORIGINAL_CWLVERSION] if "$schemas" in metadata: for s in metadata["$schemas"]: schemas.add(s) if dcr.get("class") not in ( "Workflow", "CommandLineTool", "ExpressionTool", "Operation", ): continue dc = cast(Dict[str, Any], copy.deepcopy(dcr)) v = rewrite_inputs[r] dc["id"] = v for n in ("name", "cwlVersion", "$namespaces", "$schemas"): if n in dc: del dc[n] packed["$graph"].append(dc) if schemas: packed["$schemas"] = list(schemas) if namespaces: packed["$namespaces"] = namespaces save_outputs = packed["$graph"][0].pop("outputs") for r in list(rewrite_inputs.keys()): v = rewrite_inputs[r] replace_refs(packed, rewrite_inputs, r + "/" if "#" in r else r + "#", v + "/") import_embed(packed, set()) packed["$graph"][0]["outputs"] = save_outputs for r in list(rewrite_outputs.keys()): v = rewrite_outputs[r] replace_refs( packed["$graph"][0]["outputs"], rewrite_outputs, r + "/" if "#" in r else r + "#", v + "/", ) for r in list( rewrite_inputs.keys() ): # again, to process the outputSource references v = rewrite_inputs[r] replace_refs(packed, rewrite_inputs, r + "/" if "#" in r else r + "#", v + "/") if len(packed["$graph"]) == 1: # duplicate 'cwlVersion', '$schemas', and '$namespaces' inside '$graph' # when there is only a single item because main.print_pack() will print # the contents inside '$graph' rather than whole dict in this case packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"] if schemas: packed["$graph"][0]["$schemas"] = list(schemas) if namespaces: packed["$graph"][0]["$namespaces"] = namespaces if rewrite_out is not None: rewrite_out.update(rewrite_inputs) rewrite_out.update(rewrite_outputs) return packed
def pack( loadingContext: LoadingContext, uri, # type: str rewrite_out=None, # type: Optional[Dict[str, str]] loader=None, # type: Optional[Loader] ): # type: (...) -> Dict[str, Any] # The workflow document we have in memory right now may have been # updated to the internal CWL version. We need to reload the # document to go back to its original version. # # What's going on here is that the updater replaces the # documents/fragments in the index with updated ones, the # index is also used as a cache, so we need to go through the # loading process with an empty index and updating turned off # so we have the original un-updated documents. # loadingContext = loadingContext.copy() document_loader = SubLoader(loader or loadingContext.loader or Loader({})) loadingContext.do_update = False loadingContext.loader = document_loader loadingContext.loader.idx = {} loadingContext.metadata = {} loadingContext, docobj, uri = fetch_document(uri, loadingContext) loadingContext, fileuri = resolve_and_validate_document( loadingContext, docobj, uri, preprocess_only=True ) if loadingContext.loader is None: raise Exception("loadingContext.loader cannot be none") processobj, metadata = loadingContext.loader.resolve_ref(uri) document_loader = loadingContext.loader if isinstance(processobj, MutableMapping): document_loader.idx[processobj["id"]] = CommentedMap(processobj.items()) elif isinstance(processobj, MutableSequence): _, frag = urllib.parse.urldefrag(uri) for po in processobj: if not frag: if po["id"].endswith("#main"): uri = po["id"] document_loader.idx[po["id"]] = CommentedMap(po.items()) document_loader.idx[metadata["id"]] = CommentedMap(metadata.items()) def loadref(base, uri): # type: (Optional[str], str) -> ResolveType return document_loader.resolve_ref(uri, base_url=base)[0] ids = set() # type: Set[str] find_ids(processobj, ids) runs = {uri} find_run(processobj, loadref, runs) for f in runs: find_ids(document_loader.resolve_ref(f)[0], ids) names = set() # type: Set[str] if rewrite_out is None: rewrite = {} # type: Dict[str, str] else: rewrite = rewrite_out mainpath, _ = urllib.parse.urldefrag(uri) def rewrite_id(r, mainuri): # type: (str, str) -> None if r == mainuri: rewrite[r] = "#main" elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"): if r[len(mainuri) :].startswith("#main/"): rewrite[r] = "#" + uniquename(r[len(mainuri) + 1 :], names) else: rewrite[r] = "#" + uniquename("main/" + r[len(mainuri) + 1 :], names) else: path, frag = urllib.parse.urldefrag(r) if path == mainpath: rewrite[r] = "#" + uniquename(frag, names) else: if path not in rewrite: rewrite[path] = "#" + uniquename(shortname(path), names) sortedids = sorted(ids) for r in sortedids: rewrite_id(r, uri) packed = CommentedMap( (("$graph", CommentedSeq()), ("cwlVersion", metadata["cwlVersion"])) ) namespaces = metadata.get("$namespaces", None) schemas = set() # type: Set[str] if "$schemas" in metadata: for each_schema in metadata["$schemas"]: schemas.add(each_schema) for r in sorted(runs): dcr, metadata = document_loader.resolve_ref(r) if isinstance(dcr, CommentedSeq): dcr = dcr[0] dcr = cast(CommentedMap, dcr) if not isinstance(dcr, MutableMapping): continue if "$schemas" in metadata: for s in metadata["$schemas"]: schemas.add(s) if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"): continue dc = cast(Dict[str, Any], copy.deepcopy(dcr)) v = rewrite[r] dc["id"] = v for n in ("name", "cwlVersion", "$namespaces", "$schemas"): if n in dc: del dc[n] packed["$graph"].append(dc) if schemas: packed["$schemas"] = list(schemas) for r in list(rewrite.keys()): v = rewrite[r] replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#", v + "/") import_embed(packed, set()) if len(packed["$graph"]) == 1: # duplicate 'cwlVersion' and $schemas inside $graph when there is only # a single item because we will print the contents inside '$graph' # rather than whole dict packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"] if schemas: packed["$graph"][0]["$schemas"] = list(schemas) # always include $namespaces in the #main if namespaces: packed["$graph"][0]["$namespaces"] = namespaces return packed
def pack( document_loader: Loader, processobj, # type: Union[Dict[str, Any], List[Dict[str, Any]]] uri, # type: str metadata, # type: Dict[str, str] rewrite_out=None, # type: Optional[Dict[str, str]] ): # type: (...) -> Dict[str, Any] document_loader = SubLoader(document_loader) document_loader.idx = {} if isinstance(processobj, MutableMapping): document_loader.idx[processobj["id"]] = CommentedMap( processobj.items()) elif isinstance(processobj, MutableSequence): _, frag = urllib.parse.urldefrag(uri) for po in processobj: if not frag: if po["id"].endswith("#main"): uri = po["id"] document_loader.idx[po["id"]] = CommentedMap(po.items()) document_loader.idx[metadata["id"]] = CommentedMap(metadata.items()) def loadref(base, uri): # type: (Optional[str], str) -> Union[Dict[str, Any], List[Dict[str, Any]], str, None] return document_loader.resolve_ref(uri, base_url=base)[0] ids = set() # type: Set[str] find_ids(processobj, ids) runs = {uri} find_run(processobj, loadref, runs) for f in runs: find_ids(document_loader.resolve_ref(f)[0], ids) names = set() # type: Set[str] if rewrite_out is None: rewrite = {} # type: Dict[str, str] else: rewrite = rewrite_out mainpath, _ = urllib.parse.urldefrag(uri) def rewrite_id(r, mainuri): # type: (str, str) -> None if r == mainuri: rewrite[r] = "#main" elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"): if r[len(mainuri):].startswith("#main/"): rewrite[r] = "#" + uniquename(r[len(mainuri) + 1:], names) else: rewrite[r] = "#" + uniquename("main/" + r[len(mainuri) + 1:], names) else: path, frag = urllib.parse.urldefrag(r) if path == mainpath: rewrite[r] = "#" + uniquename(frag, names) else: if path not in rewrite: rewrite[path] = "#" + uniquename(shortname(path), names) sortedids = sorted(ids) for r in sortedids: rewrite_id(r, uri) packed = CommentedMap( (("$graph", CommentedSeq()), ("cwlVersion", metadata["cwlVersion"]))) namespaces = metadata.get("$namespaces", None) schemas = set() # type: Set[str] if "$schemas" in metadata: for each_schema in metadata["$schemas"]: schemas.add(each_schema) for r in sorted(runs): dcr, metadata = document_loader.resolve_ref(r) if isinstance(dcr, CommentedSeq): dcr = dcr[0] dcr = cast(CommentedMap, dcr) if not isinstance(dcr, MutableMapping): continue metadata = cast(Dict[str, Any], metadata) if "$schemas" in metadata: for s in metadata["$schemas"]: schemas.add(s) if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"): continue dc = cast(Dict[str, Any], copy.deepcopy(dcr)) v = rewrite[r] dc["id"] = v for n in ("name", "cwlVersion", "$namespaces", "$schemas"): if n in dc: del dc[n] packed["$graph"].append(dc) if schemas: packed["$schemas"] = list(schemas) for r in list(rewrite.keys()): v = rewrite[r] replace_refs(packed, rewrite, r + "/" if "#" in r else r + "#", v + "/") import_embed(packed, set()) if len(packed["$graph"]) == 1: # duplicate 'cwlVersion' and $schemas inside $graph when there is only # a single item because we will print the contents inside '$graph' # rather than whole dict packed["$graph"][0]["cwlVersion"] = packed["cwlVersion"] if schemas: packed["$graph"][0]["$schemas"] = list(schemas) # always include $namespaces in the #main if namespaces: packed["$graph"][0]["$namespaces"] = namespaces return packed