def _extract_base_form(cls, column_source): branch_forms = {} for key, branch in column_source.items(): if "," in key or "!" in key: warnings.warn( f"Skipping {key} because it contains characters that NanoEvents cannot accept [,!]" ) continue form = json.loads(branch.layout.form.tojson()) if (form["class"].startswith("ListOffset") and form["content"]["class"] == "NumpyArray" # noqa ): form["form_key"] = quote(f"{key},!load") form["content"]["form_key"] = quote(f"{key},!load,!content") form["content"]["parameters"] = {"__doc__": key} elif form["class"] == "NumpyArray": form["form_key"] = quote(f"{key},!load") form["parameters"] = {"__doc__": key} else: warnings.warn( f"Skipping {key} as it is not interpretable by NanoEvents") continue branch_forms[key] = form return { "class": "RecordArray", "contents": branch_forms, "parameters": { "__doc__": "preloaded column source" }, "form_key": "", }
def zip_forms(forms, name, record_name=None, offsets=None, bypass=False): if not isinstance(forms, dict): raise ValueError("Expected a dictionary") if all(form["class"].startswith("ListOffsetArray") for form in forms.values()): first = next(iter(forms.values())) if not all(form["class"] == first["class"] for form in forms.values()): raise ValueError if not all(form["offsets"] == first["offsets"] for form in forms.values()): raise ValueError record = { "class": "RecordArray", "contents": {k: form["content"] for k, form in forms.items()}, "form_key": quote("!invalid," + name), } if record_name is not None: record["parameters"] = {"__record__": record_name} if offsets is None: return { "class": first["class"], "offsets": first["offsets"], "content": record, "form_key": first["form_key"], } else: return listarray_form(record, offsets) elif all(form["class"] == "NumpyArray" for form in forms.values()): record = { "class": "RecordArray", "contents": {k: form for k, form in forms.items()}, "form_key": quote("!invalid," + name), } if record_name is not None: record["parameters"] = {"__record__": record_name} return record # elif all(form["class"] in [ "RecordArray", "NumpyArray", "ListOffsetArray"] for form in forms.values()): elif all("class" in form for form in forms.values()) and not bypass: record = { "class": "RecordArray", "contents": {k: form for k, form in forms.items()}, "form_key": quote("!invalid," + name), } if record_name is not None: record["parameters"] = {"__record__": record_name} return record else: raise NotImplementedError("Cannot zip forms")
def _extract_base_form(cls, arrow_schema): column_forms = {} for field in arrow_schema: key = field.name fmeta = {} if field.metadata is None else field.metadata if "," in key or "!" in key: warnings.warn( f"Skipping {key} because it contains characters that NanoEvents cannot accept [,!]" ) continue form = None if b"form" in fmeta: form = json.loads(fmeta[b"form"]) else: schema = field.type form = arrow_schema_to_awkward_form(schema) form = json.loads(form.tojson()) if ( form["class"].startswith("ListOffset") and form["content"]["class"] == "NumpyArray" # noqa ): form["form_key"] = quote(f"{key},!load") form["content"]["form_key"] = quote(f"{key},!load,!content") if b"title" in fmeta: form["content"]["parameters"] = { "__doc__": fmeta[b"title"].decode() } elif "__doc__" not in form["content"]["parameters"]: form["content"]["parameters"] = {"__doc__": key} elif form["class"] == "NumpyArray": form["form_key"] = quote(f"{key},!load") if b"title" in fmeta: form["parameters"] = {"__doc__": fmeta[b"title"].decode()} elif "__doc__" not in form["parameters"]: form["parameters"] = {"__doc__": key} else: warnings.warn( f"Skipping {key} as it is not interpretable by NanoEvents" ) continue column_forms[key] = form return { "class": "RecordArray", "contents": column_forms, "parameters": {"__doc__": "parquetfile"}, "form_key": "", }
def _create_eventindex_form(base_form, key): form = copy.deepcopy(base_form) form["content"] = { "class": "NumpyArray", "parameters": {}, "form_key": quote(f"{key},!load,!eventindex,!content"), "itemsize": 8, "primitive": "int64", } return form
def _extract_base_form(cls, tree): branch_forms = {} for key, branch in tree.iteritems(): if "," in key or "!" in key: warnings.warn( f"Skipping {key} because it contains characters that NanoEvents cannot accept [,!]" ) continue if len(branch): continue form = branch.interpretation.awkward_form(None) form = uproot._util.awkward_form_remove_uproot(awkward1, form) form = json.loads(form.tojson()) if (form["class"].startswith("ListOffset") and form["content"]["class"] == "NumpyArray" # noqa ): form["form_key"] = quote(f"{key},!load") form["content"]["form_key"] = quote(f"{key},!load,!content") form["content"]["parameters"] = {"__doc__": branch.title} elif form["class"] == "NumpyArray": form["form_key"] = quote(f"{key},!load") form["parameters"] = {"__doc__": branch.title} else: warnings.warn( f"Skipping {key} as it is not interpretable by NanoEvents") continue branch_forms[key] = form return { "class": "RecordArray", "contents": branch_forms, "parameters": { "__doc__": tree.title }, "form_key": "", }
def _extract_base_form(cls, tree, iteritems_options={}): branch_forms = {} for key, branch in tree.iteritems(**iteritems_options): if key in branch_forms: warnings.warn( f"Found duplicate branch {key} in {tree}, taking first instance" ) continue if "," in key or "!" in key: warnings.warn( f"Skipping {key} because it contains characters that NanoEvents cannot accept [,!]" ) continue if len(branch): continue if isinstance( branch.interpretation, uproot.interpretation.identify.UnknownInterpretation, ): warnings.warn( f"Skipping {key} as it is not interpretable by Uproot") continue try: form = branch.interpretation.awkward_form(None) except uproot.interpretation.objects.CannotBeAwkward: warnings.warn( f"Skipping {key} as it is it cannot be represented as an Awkward array" ) continue form = uproot._util.awkward_form_remove_uproot(awkward, form) form = json.loads(form.tojson()) if (form["class"].startswith("ListOffset") and form["content"]["class"] == "NumpyArray" # noqa ): form["form_key"] = quote(f"{key},!load") form["content"]["form_key"] = quote(f"{key},!load,!content") form["content"]["parameters"] = {"__doc__": branch.title} elif (form["class"].startswith("ListOffset") and form["content"]["class"].startswith("ListOffset") and form["content"]["content"]["class"] in ["NumpyArray", "RecordArray"]): form["form_key"] = quote(f"{key},!load") form["content"]["form_key"] = quote(f"{key},!load,!content") form["content"]["parameters"] = {"__doc__": branch.title} if form["content"]["content"]["class"] == "NumpyArray": form["content"]["content"]["form_key"] = quote( f"{key},!load,!content,!content") else: for field in form["content"]["content"]["contents"]: form["content"]["content"]["contents"][field][ "form_key"] = quote( f"{key},!load,!content,!content,{field},!item") elif form["class"] == "NumpyArray": form["form_key"] = quote(f"{key},!load") form["parameters"] = {"__doc__": branch.title} else: warnings.warn( f"Skipping {key} as it is not interpretable by NanoEvents") continue branch_forms[key] = form return { "class": "RecordArray", "contents": branch_forms, "parameters": { "__doc__": tree.title }, "form_key": "", }