Beispiel #1
0
def nest_jagged_forms(parent, child, counts_name, name):
    """Place child listarray inside parent listarray as a double-jagged array"""
    if not parent["class"].startswith("ListOffsetArray"):
        raise ValueError
    if parent["content"]["class"] != "RecordArray":
        raise ValueError
    if not child["class"].startswith("ListOffsetArray"):
        raise ValueError
    counts = parent["content"]["contents"][counts_name]
    offsets = transforms.counts2offsets_form(counts)
    inner = listarray_form(child["content"], offsets)
    parent["content"]["contents"][name] = inner
Beispiel #2
0
    def _build_collections(self, branch_forms):
        def _tlorentz_vectorize(objname, form):
            # first handle RecordArray
            if {"fE", "fP"} == form.get("contents", {}).keys():
                return zip_forms(
                    {
                        "x": form["contents"]["fP"]["contents"]["fX"],
                        "y": form["contents"]["fP"]["contents"]["fY"],
                        "z": form["contents"]["fP"]["contents"]["fZ"],
                        "t": form["contents"]["fE"],
                    },
                    objname,
                    "LorentzVector",
                )
            # If there's no "content", like a NumpyArray, just return.
            # Note: this comes after checking for RecordArray.
            if "content" not in form:
                return form
            # Then recursively go through and update the form's content.
            form["content"] = _tlorentz_vectorize(objname, form["content"])
            return form

        # preprocess lorentz vectors properly (and recursively)
        for objname, form in branch_forms.items():
            branch_forms[objname] = _tlorentz_vectorize(objname, form)

        # parse into high-level records (collections, list collections, and singletons)
        collections = set(k.split("/")[0] for k in branch_forms)
        collections -= set(k for k in collections if k.endswith("_size"))

        # Create offsets virtual arrays
        for name in collections:
            if f"{name}_size" in branch_forms:
                branch_forms[f"o{name}"] = transforms.counts2offsets_form(
                    branch_forms[f"{name}_size"])

        output = {}
        for name in collections:
            output[f"{name}.offsets"] = branch_forms[f"o{name}"]
            mixin = self.mixins.get(name, "NanoCollection")

            # Every delphes collection is a list
            offsets = branch_forms["o" + name]
            content = {
                k[2 * len(name) + 2:]: branch_forms[k]
                for k in branch_forms if k.startswith(name + "/" + name)
            }
            output[name] = zip_forms(content,
                                     name,
                                     record_name=mixin,
                                     offsets=offsets)

            # update docstrings as needed
            # NB: must be before flattening for easier logic
            for parameter in output[name]["content"]["contents"].keys():
                if "parameters" not in output[name]["content"]["contents"][
                        parameter]:
                    continue
                output[name]["content"]["contents"][parameter]["parameters"][
                    "__doc__"] = self.docstrings.get(
                        parameter,
                        output[name]["content"]["contents"][parameter]
                        ["parameters"].get("__doc__",
                                           "no docstring available"),
                    )

            # handle branches named like [4] and [5]
            output[name]["content"]["contents"] = {
                k.replace("[", "_").replace("]", ""): v
                for k, v in output[name]["content"]["contents"].items()
            }
            output[name]["content"]["parameters"].update({
                "__doc__":
                offsets["parameters"]["__doc__"],
                "collection_name":
                name,
            })

            if name in self.singletons:
                # flatten! this 'promotes' the content of an inner dimension
                # upwards, effectively hiding one nested dimension
                output[name] = output[name]["content"]

        return output
Beispiel #3
0
    def _build_collections(self, branch_forms):
        # parse into high-level records (collections, list collections, and singletons)
        collections = set(k.split("_")[0] for k in branch_forms)
        collections -= set(k for k in collections
                           if k.startswith("n") and k[1:] in collections)

        # Create offsets virtual arrays
        for name in collections:
            if "n" + name in branch_forms:
                branch_forms["o" + name] = transforms.counts2offsets_form(
                    branch_forms["n" + name])

        # Create global index virtual arrays for indirection
        idxbranches = [k for k in branch_forms if "Idx" in k]
        for name in collections:
            indexers = [k for k in idxbranches if k.startswith(name + "_")]
            for k in indexers:
                target = k[len(name) + 1:k.find("Idx")]
                target = target[0].upper() + target[1:]
                if target not in collections:
                    raise RuntimeError(
                        "Parsing indexer %s, expected to find collection %s but did not"
                        % (k, target))
                branch_forms[k + "G"] = transforms.local2global_form(
                    branch_forms[k], branch_forms["o" + target])

        # Create nested indexer from Idx1, Idx2, ... arrays
        for name, indexers in self.nested_items.items():
            if all(idx in branch_forms for idx in indexers):
                branch_forms[name] = transforms.nestedindex_form(
                    [branch_forms[idx] for idx in indexers])

        # Create any special arrays
        for name, (fcn, args) in self.special_items.items():
            if all(k in branch_forms for k in args):
                branch_forms[name] = fcn(*(branch_forms[k] for k in args))

        output = {}
        for name in collections:
            mixin = self.mixins.get(name, "NanoCollection")
            if "o" + name in branch_forms and name not in branch_forms:
                # list collection
                offsets = branch_forms["o" + name]
                content = {
                    k[len(name) + 1:]: branch_forms[k]
                    for k in branch_forms if k.startswith(name + "_")
                }
                output[name] = zip_forms(content,
                                         name,
                                         record_name=mixin,
                                         offsets=offsets)
                output[name]["content"]["parameters"].update({
                    "__doc__":
                    offsets["parameters"]["__doc__"],
                    "collection_name":
                    name,
                })
            elif "o" + name in branch_forms:
                # list singleton, can use branch's own offsets
                output[name] = branch_forms[name]
                output[name]["parameters"].update({
                    "__array__": mixin,
                    "collection_name": name
                })
            elif name in branch_forms:
                # singleton
                output[name] = branch_forms[name]
            else:
                # simple collection
                output[name] = zip_forms(
                    {
                        k[len(name) + 1:]: branch_forms[k]
                        for k in branch_forms if k.startswith(name + "_")
                    },
                    name,
                    record_name=mixin,
                )
                output[name]["parameters"].update({"collection_name": name})

        return output
Beispiel #4
0
    def _build_collections(self, branch_forms):
        # parse into high-level records (collections, list collections, and singletons)
        collections = set(k.split("_")[0] for k in branch_forms)
        collections -= set(
            k for k in collections if k.startswith("n") and k[1:] in collections
        )
        isData = "GenPart" not in collections

        # Create offsets virtual arrays
        for name in collections:
            if "n" + name in branch_forms:
                branch_forms["o" + name] = transforms.counts2offsets_form(
                    branch_forms["n" + name]
                )

        # Create global index virtual arrays for indirection
        for indexer, target in self.cross_references.items():
            if target.startswith("Gen") and isData:
                continue
            if indexer not in branch_forms:
                if self.warn_missing_crossrefs:
                    warnings.warn(
                        f"Missing cross-reference index for {indexer} => {target}",
                        RuntimeWarning,
                    )
                continue
            if "o" + target not in branch_forms:
                if self.warn_missing_crossrefs:
                    warnings.warn(
                        f"Missing cross-reference target for {indexer} => {target}",
                        RuntimeWarning,
                    )
                continue
            branch_forms[indexer + "G"] = transforms.local2global_form(
                branch_forms[indexer], branch_forms["o" + target]
            )

        # Create nested indexer from Idx1, Idx2, ... arrays
        for name, indexers in self.nested_items.items():
            if all(idx in branch_forms for idx in indexers):
                branch_forms[name] = transforms.nestedindex_form(
                    [branch_forms[idx] for idx in indexers]
                )

        # Create nested indexer from n* counts arrays
        for name, (local_counts, target) in self.nested_index_items.items():
            if local_counts in branch_forms and "o" + target in branch_forms:
                branch_forms[name] = transforms.counts2nestedindex_form(
                    branch_forms[local_counts], branch_forms["o" + target]
                )

        # Create any special arrays
        for name, (fcn, args) in self.special_items.items():
            if all(k in branch_forms for k in args):
                branch_forms[name] = fcn(*(branch_forms[k] for k in args))

        output = {}
        for name in collections:
            mixin = self.mixins.get(name, "NanoCollection")
            if "o" + name in branch_forms and name not in branch_forms:
                # list collection
                offsets = branch_forms["o" + name]
                content = {
                    k[len(name) + 1 :]: branch_forms[k]
                    for k in branch_forms
                    if k.startswith(name + "_")
                }
                output[name] = zip_forms(
                    content, name, record_name=mixin, offsets=offsets
                )
                output[name]["content"]["parameters"].update(
                    {
                        "__doc__": offsets["parameters"]["__doc__"],
                        "collection_name": name,
                    }
                )
            elif "o" + name in branch_forms:
                # list singleton, can use branch's own offsets
                output[name] = branch_forms[name]
                output[name]["parameters"].update(
                    {"__array__": mixin, "collection_name": name}
                )
            elif name in branch_forms:
                # singleton
                output[name] = branch_forms[name]
            else:
                # simple collection
                output[name] = zip_forms(
                    {
                        k[len(name) + 1 :]: branch_forms[k]
                        for k in branch_forms
                        if k.startswith(name + "_")
                    },
                    name,
                    record_name=mixin,
                )
                output[name]["parameters"].update({"collection_name": name})

        return output