def nest_jagged_forms(parent, child, counts_name, name): """Place child listarray inside parent listarray as a double-jagged array""" if not parent["class"].startswith("ListOffsetArray"): raise ValueError if parent["content"]["class"] != "RecordArray": raise ValueError if not child["class"].startswith("ListOffsetArray"): raise ValueError counts = parent["content"]["contents"][counts_name] offsets = transforms.counts2offsets_form(counts) inner = listarray_form(child["content"], offsets) parent["content"]["contents"][name] = inner
def _build_collections(self, branch_forms): def _tlorentz_vectorize(objname, form): # first handle RecordArray if {"fE", "fP"} == form.get("contents", {}).keys(): return zip_forms( { "x": form["contents"]["fP"]["contents"]["fX"], "y": form["contents"]["fP"]["contents"]["fY"], "z": form["contents"]["fP"]["contents"]["fZ"], "t": form["contents"]["fE"], }, objname, "LorentzVector", ) # If there's no "content", like a NumpyArray, just return. # Note: this comes after checking for RecordArray. if "content" not in form: return form # Then recursively go through and update the form's content. form["content"] = _tlorentz_vectorize(objname, form["content"]) return form # preprocess lorentz vectors properly (and recursively) for objname, form in branch_forms.items(): branch_forms[objname] = _tlorentz_vectorize(objname, form) # parse into high-level records (collections, list collections, and singletons) collections = set(k.split("/")[0] for k in branch_forms) collections -= set(k for k in collections if k.endswith("_size")) # Create offsets virtual arrays for name in collections: if f"{name}_size" in branch_forms: branch_forms[f"o{name}"] = transforms.counts2offsets_form( branch_forms[f"{name}_size"]) output = {} for name in collections: output[f"{name}.offsets"] = branch_forms[f"o{name}"] mixin = self.mixins.get(name, "NanoCollection") # Every delphes collection is a list offsets = branch_forms["o" + name] content = { k[2 * len(name) + 2:]: branch_forms[k] for k in branch_forms if k.startswith(name + "/" + name) } output[name] = zip_forms(content, name, record_name=mixin, offsets=offsets) # update docstrings as needed # NB: must be before flattening for easier logic for parameter in output[name]["content"]["contents"].keys(): if "parameters" not in output[name]["content"]["contents"][ parameter]: continue output[name]["content"]["contents"][parameter]["parameters"][ "__doc__"] = self.docstrings.get( parameter, output[name]["content"]["contents"][parameter] ["parameters"].get("__doc__", "no docstring available"), ) # handle branches named like [4] and [5] output[name]["content"]["contents"] = { k.replace("[", "_").replace("]", ""): v for k, v in output[name]["content"]["contents"].items() } output[name]["content"]["parameters"].update({ "__doc__": offsets["parameters"]["__doc__"], "collection_name": name, }) if name in self.singletons: # flatten! this 'promotes' the content of an inner dimension # upwards, effectively hiding one nested dimension output[name] = output[name]["content"] return output
def _build_collections(self, branch_forms): # parse into high-level records (collections, list collections, and singletons) collections = set(k.split("_")[0] for k in branch_forms) collections -= set(k for k in collections if k.startswith("n") and k[1:] in collections) # Create offsets virtual arrays for name in collections: if "n" + name in branch_forms: branch_forms["o" + name] = transforms.counts2offsets_form( branch_forms["n" + name]) # Create global index virtual arrays for indirection idxbranches = [k for k in branch_forms if "Idx" in k] for name in collections: indexers = [k for k in idxbranches if k.startswith(name + "_")] for k in indexers: target = k[len(name) + 1:k.find("Idx")] target = target[0].upper() + target[1:] if target not in collections: raise RuntimeError( "Parsing indexer %s, expected to find collection %s but did not" % (k, target)) branch_forms[k + "G"] = transforms.local2global_form( branch_forms[k], branch_forms["o" + target]) # Create nested indexer from Idx1, Idx2, ... arrays for name, indexers in self.nested_items.items(): if all(idx in branch_forms for idx in indexers): branch_forms[name] = transforms.nestedindex_form( [branch_forms[idx] for idx in indexers]) # Create any special arrays for name, (fcn, args) in self.special_items.items(): if all(k in branch_forms for k in args): branch_forms[name] = fcn(*(branch_forms[k] for k in args)) output = {} for name in collections: mixin = self.mixins.get(name, "NanoCollection") if "o" + name in branch_forms and name not in branch_forms: # list collection offsets = branch_forms["o" + name] content = { k[len(name) + 1:]: branch_forms[k] for k in branch_forms if k.startswith(name + "_") } output[name] = zip_forms(content, name, record_name=mixin, offsets=offsets) output[name]["content"]["parameters"].update({ "__doc__": offsets["parameters"]["__doc__"], "collection_name": name, }) elif "o" + name in branch_forms: # list singleton, can use branch's own offsets output[name] = branch_forms[name] output[name]["parameters"].update({ "__array__": mixin, "collection_name": name }) elif name in branch_forms: # singleton output[name] = branch_forms[name] else: # simple collection output[name] = zip_forms( { k[len(name) + 1:]: branch_forms[k] for k in branch_forms if k.startswith(name + "_") }, name, record_name=mixin, ) output[name]["parameters"].update({"collection_name": name}) return output
def _build_collections(self, branch_forms): # parse into high-level records (collections, list collections, and singletons) collections = set(k.split("_")[0] for k in branch_forms) collections -= set( k for k in collections if k.startswith("n") and k[1:] in collections ) isData = "GenPart" not in collections # Create offsets virtual arrays for name in collections: if "n" + name in branch_forms: branch_forms["o" + name] = transforms.counts2offsets_form( branch_forms["n" + name] ) # Create global index virtual arrays for indirection for indexer, target in self.cross_references.items(): if target.startswith("Gen") and isData: continue if indexer not in branch_forms: if self.warn_missing_crossrefs: warnings.warn( f"Missing cross-reference index for {indexer} => {target}", RuntimeWarning, ) continue if "o" + target not in branch_forms: if self.warn_missing_crossrefs: warnings.warn( f"Missing cross-reference target for {indexer} => {target}", RuntimeWarning, ) continue branch_forms[indexer + "G"] = transforms.local2global_form( branch_forms[indexer], branch_forms["o" + target] ) # Create nested indexer from Idx1, Idx2, ... arrays for name, indexers in self.nested_items.items(): if all(idx in branch_forms for idx in indexers): branch_forms[name] = transforms.nestedindex_form( [branch_forms[idx] for idx in indexers] ) # Create nested indexer from n* counts arrays for name, (local_counts, target) in self.nested_index_items.items(): if local_counts in branch_forms and "o" + target in branch_forms: branch_forms[name] = transforms.counts2nestedindex_form( branch_forms[local_counts], branch_forms["o" + target] ) # Create any special arrays for name, (fcn, args) in self.special_items.items(): if all(k in branch_forms for k in args): branch_forms[name] = fcn(*(branch_forms[k] for k in args)) output = {} for name in collections: mixin = self.mixins.get(name, "NanoCollection") if "o" + name in branch_forms and name not in branch_forms: # list collection offsets = branch_forms["o" + name] content = { k[len(name) + 1 :]: branch_forms[k] for k in branch_forms if k.startswith(name + "_") } output[name] = zip_forms( content, name, record_name=mixin, offsets=offsets ) output[name]["content"]["parameters"].update( { "__doc__": offsets["parameters"]["__doc__"], "collection_name": name, } ) elif "o" + name in branch_forms: # list singleton, can use branch's own offsets output[name] = branch_forms[name] output[name]["parameters"].update( {"__array__": mixin, "collection_name": name} ) elif name in branch_forms: # singleton output[name] = branch_forms[name] else: # simple collection output[name] = zip_forms( { k[len(name) + 1 :]: branch_forms[k] for k in branch_forms if k.startswith(name + "_") }, name, record_name=mixin, ) output[name]["parameters"].update({"collection_name": name}) return output