Example #1
0
    def _extract_base_form(cls, column_source):
        branch_forms = {}
        for key, branch in column_source.items():
            if "," in key or "!" in key:
                warnings.warn(
                    f"Skipping {key} because it contains characters that NanoEvents cannot accept [,!]"
                )
                continue
            form = json.loads(branch.layout.form.tojson())
            if (form["class"].startswith("ListOffset")
                    and form["content"]["class"] == "NumpyArray"  # noqa
                ):
                form["form_key"] = quote(f"{key},!load")
                form["content"]["form_key"] = quote(f"{key},!load,!content")
                form["content"]["parameters"] = {"__doc__": key}
            elif form["class"] == "NumpyArray":
                form["form_key"] = quote(f"{key},!load")
                form["parameters"] = {"__doc__": key}
            else:
                warnings.warn(
                    f"Skipping {key} as it is not interpretable by NanoEvents")
                continue
            branch_forms[key] = form

        return {
            "class": "RecordArray",
            "contents": branch_forms,
            "parameters": {
                "__doc__": "preloaded column source"
            },
            "form_key": "",
        }
Example #2
0
def zip_forms(forms, name, record_name=None, offsets=None, bypass=False):
    if not isinstance(forms, dict):
        raise ValueError("Expected a dictionary")
    if all(form["class"].startswith("ListOffsetArray")
           for form in forms.values()):
        first = next(iter(forms.values()))
        if not all(form["class"] == first["class"] for form in forms.values()):
            raise ValueError
        if not all(form["offsets"] == first["offsets"]
                   for form in forms.values()):
            raise ValueError
        record = {
            "class": "RecordArray",
            "contents": {k: form["content"]
                         for k, form in forms.items()},
            "form_key": quote("!invalid," + name),
        }
        if record_name is not None:
            record["parameters"] = {"__record__": record_name}
        if offsets is None:
            return {
                "class": first["class"],
                "offsets": first["offsets"],
                "content": record,
                "form_key": first["form_key"],
            }
        else:
            return listarray_form(record, offsets)
    elif all(form["class"] == "NumpyArray" for form in forms.values()):
        record = {
            "class": "RecordArray",
            "contents": {k: form
                         for k, form in forms.items()},
            "form_key": quote("!invalid," + name),
        }
        if record_name is not None:
            record["parameters"] = {"__record__": record_name}
        return record
    # elif all(form["class"] in [ "RecordArray", "NumpyArray", "ListOffsetArray"] for form in forms.values()):
    elif all("class" in form for form in forms.values()) and not bypass:
        record = {
            "class": "RecordArray",
            "contents": {k: form
                         for k, form in forms.items()},
            "form_key": quote("!invalid," + name),
        }
        if record_name is not None:
            record["parameters"] = {"__record__": record_name}
        return record
    else:
        raise NotImplementedError("Cannot zip forms")
Example #3
0
    def _extract_base_form(cls, arrow_schema):
        column_forms = {}
        for field in arrow_schema:
            key = field.name
            fmeta = {} if field.metadata is None else field.metadata

            if "," in key or "!" in key:
                warnings.warn(
                    f"Skipping {key} because it contains characters that NanoEvents cannot accept [,!]"
                )
                continue

            form = None
            if b"form" in fmeta:
                form = json.loads(fmeta[b"form"])
            else:
                schema = field.type
                form = arrow_schema_to_awkward_form(schema)
                form = json.loads(form.tojson())

            if (
                form["class"].startswith("ListOffset")
                and form["content"]["class"] == "NumpyArray"  # noqa
            ):
                form["form_key"] = quote(f"{key},!load")
                form["content"]["form_key"] = quote(f"{key},!load,!content")
                if b"title" in fmeta:
                    form["content"]["parameters"] = {
                        "__doc__": fmeta[b"title"].decode()
                    }
                elif "__doc__" not in form["content"]["parameters"]:
                    form["content"]["parameters"] = {"__doc__": key}
            elif form["class"] == "NumpyArray":
                form["form_key"] = quote(f"{key},!load")
                if b"title" in fmeta:
                    form["parameters"] = {"__doc__": fmeta[b"title"].decode()}
                elif "__doc__" not in form["parameters"]:
                    form["parameters"] = {"__doc__": key}
            else:
                warnings.warn(
                    f"Skipping {key} as it is not interpretable by NanoEvents"
                )
                continue
            column_forms[key] = form
        return {
            "class": "RecordArray",
            "contents": column_forms,
            "parameters": {"__doc__": "parquetfile"},
            "form_key": "",
        }
Example #4
0
 def _create_eventindex_form(base_form, key):
     form = copy.deepcopy(base_form)
     form["content"] = {
         "class": "NumpyArray",
         "parameters": {},
         "form_key": quote(f"{key},!load,!eventindex,!content"),
         "itemsize": 8,
         "primitive": "int64",
     }
     return form
Example #5
0
    def _extract_base_form(cls, tree):
        branch_forms = {}
        for key, branch in tree.iteritems():
            if "," in key or "!" in key:
                warnings.warn(
                    f"Skipping {key} because it contains characters that NanoEvents cannot accept [,!]"
                )
                continue
            if len(branch):
                continue
            form = branch.interpretation.awkward_form(None)
            form = uproot._util.awkward_form_remove_uproot(awkward1, form)
            form = json.loads(form.tojson())
            if (form["class"].startswith("ListOffset")
                    and form["content"]["class"] == "NumpyArray"  # noqa
                ):
                form["form_key"] = quote(f"{key},!load")
                form["content"]["form_key"] = quote(f"{key},!load,!content")
                form["content"]["parameters"] = {"__doc__": branch.title}
            elif form["class"] == "NumpyArray":
                form["form_key"] = quote(f"{key},!load")
                form["parameters"] = {"__doc__": branch.title}
            else:
                warnings.warn(
                    f"Skipping {key} as it is not interpretable by NanoEvents")
                continue
            branch_forms[key] = form

        return {
            "class": "RecordArray",
            "contents": branch_forms,
            "parameters": {
                "__doc__": tree.title
            },
            "form_key": "",
        }
Example #6
0
    def _extract_base_form(cls, tree, iteritems_options={}):
        branch_forms = {}
        for key, branch in tree.iteritems(**iteritems_options):
            if key in branch_forms:
                warnings.warn(
                    f"Found duplicate branch {key} in {tree}, taking first instance"
                )
                continue
            if "," in key or "!" in key:
                warnings.warn(
                    f"Skipping {key} because it contains characters that NanoEvents cannot accept [,!]"
                )
                continue
            if len(branch):
                continue
            if isinstance(
                    branch.interpretation,
                    uproot.interpretation.identify.UnknownInterpretation,
            ):
                warnings.warn(
                    f"Skipping {key} as it is not interpretable by Uproot")
                continue
            try:
                form = branch.interpretation.awkward_form(None)
            except uproot.interpretation.objects.CannotBeAwkward:
                warnings.warn(
                    f"Skipping {key} as it is it cannot be represented as an Awkward array"
                )
                continue
            form = uproot._util.awkward_form_remove_uproot(awkward, form)
            form = json.loads(form.tojson())
            if (form["class"].startswith("ListOffset")
                    and form["content"]["class"] == "NumpyArray"  # noqa
                ):
                form["form_key"] = quote(f"{key},!load")
                form["content"]["form_key"] = quote(f"{key},!load,!content")
                form["content"]["parameters"] = {"__doc__": branch.title}
            elif (form["class"].startswith("ListOffset")
                  and form["content"]["class"].startswith("ListOffset")
                  and form["content"]["content"]["class"]
                  in ["NumpyArray", "RecordArray"]):
                form["form_key"] = quote(f"{key},!load")
                form["content"]["form_key"] = quote(f"{key},!load,!content")
                form["content"]["parameters"] = {"__doc__": branch.title}
                if form["content"]["content"]["class"] == "NumpyArray":
                    form["content"]["content"]["form_key"] = quote(
                        f"{key},!load,!content,!content")
                else:
                    for field in form["content"]["content"]["contents"]:
                        form["content"]["content"]["contents"][field][
                            "form_key"] = quote(
                                f"{key},!load,!content,!content,{field},!item")
            elif form["class"] == "NumpyArray":
                form["form_key"] = quote(f"{key},!load")
                form["parameters"] = {"__doc__": branch.title}
            else:
                warnings.warn(
                    f"Skipping {key} as it is not interpretable by NanoEvents")
                continue
            branch_forms[key] = form

        return {
            "class": "RecordArray",
            "contents": branch_forms,
            "parameters": {
                "__doc__": tree.title
            },
            "form_key": "",
        }