def test_with_fields(tmp_path): one_list = [[{"x": 1}, {"x": 2}, {"x": 3}], [], [{"x": 4}, {"x": 5}]] two_list = [[{"x": 6}], [{"x": 7}, {"x": 8}, {"x": 9}, {"x": 10}]] one = ak.Array(one_list) two = ak.Array(two_list) ak.to_parquet(one, tmp_path / "file1.parquet") ak.to_parquet(two, tmp_path / "file2.parquet") assert not os.path.exists(tmp_path / "_common_metadata") assert not os.path.exists(tmp_path / "_metadata") no_metadata = ak.from_parquet(tmp_path) assert no_metadata.tolist() == one_list + two_list no_metadata_lazy = ak.from_parquet(tmp_path, lazy=True) assert no_metadata_lazy.tolist() == one_list + two_list ak.to_parquet.dataset(tmp_path) assert os.path.exists(tmp_path / "_common_metadata") assert os.path.exists(tmp_path / "_metadata") with_metadata = ak.from_parquet(tmp_path) assert with_metadata.tolist() == one_list + two_list with_metadata_lazy = ak.from_parquet(tmp_path, lazy=True) assert with_metadata_lazy.tolist() == one_list + two_list
def test_no_fields(tmp_path): one = ak.Array([[1, 2, 3], [], [4, 5]]) two = ak.Array([[6], [7, 8, 9, 10]]) ak.to_parquet(one, tmp_path / "file1.parquet") ak.to_parquet(two, tmp_path / "file2.parquet") assert not os.path.exists(tmp_path / "_common_metadata") assert not os.path.exists(tmp_path / "_metadata") no_metadata = ak.from_parquet(tmp_path) assert no_metadata.tolist() == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] no_metadata_lazy = ak.from_parquet(tmp_path, lazy=True) assert no_metadata_lazy.tolist() == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] ak.to_parquet.dataset(tmp_path) assert os.path.exists(tmp_path / "_common_metadata") assert os.path.exists(tmp_path / "_metadata") with_metadata = ak.from_parquet(tmp_path) assert with_metadata.tolist() == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]] with_metadata_lazy = ak.from_parquet(tmp_path, lazy=True) assert with_metadata_lazy.tolist() == [[1, 2, 3], [], [4, 5], [6], [7, 8, 9, 10]]
def test_6(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test6.parquet") data = [ {"x": {"y": [], "z": 1.1}}, {"x": {"y": [one], "z": 2.2}}, {"x": {"y": [one, two, three], "z": 3.3}}, ] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert set(array.caches[0].keys()) == set() array.layout.field("x").array.field("z").array assert set(array.caches[0].keys()) == {"tmp:col:x.z[0]"} array.layout.field("x").array.field("y").array assert set(array.caches[0].keys()) == {"tmp:col:x.z[0]", "tmp:lst:x.y[0]"} assert array.tolist() == data array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert set(array.caches[0].keys()) == set() array.layout.field("x").array.field("y").array assert set(array.caches[0].keys()) == {"tmp:lst:x.y[0]"} array.layout.field("x").array.field("z").array assert set(array.caches[0].keys()) == {"tmp:lst:x.y[0]", "tmp:col:x.z[0]"} assert array.tolist() == data
def test_8(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test8.parquet") data = [ { "x": [] }, { "x": [{ "y": one, "z": 1.1 }] }, { "x": [{ "y": one, "z": 1.1 }, { "y": two, "z": 2.2 }, { "y": three, "z": 3.3 }] }, ] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert set(array.caches[0].keys()) == set(["tmp:off:x.list.item.y:x[0]"]) assert np.asarray( array.layout.field("x").array.offsets).tolist() == [0, 0, 1, 4] assert set(array.caches[0].keys()) == set(["tmp:off:x.list.item.y:x[0]"]) array.layout.field("x").array.content.field("y").array assert set(array.caches[0].keys()) == set( ["tmp:off:x.list.item.y:x[0]", "tmp:col:x.list.item.y[0]"]) array.layout.field("x").array.content.field("z").array assert set(array.caches[0].keys()) == set([ "tmp:off:x.list.item.y:x[0]", "tmp:col:x.list.item.y[0]", "tmp:col:x.list.item.z[0]", ]) assert array.tolist() == data array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert set(array.caches[0].keys()) == set(["tmp:off:x.list.item.y:x[0]"]) assert np.asarray( array.layout.field("x").array.offsets).tolist() == [0, 0, 1, 4] assert set(array.caches[0].keys()) == set(["tmp:off:x.list.item.y:x[0]"]) array.layout.field("x").array.content.field("z").array assert set(array.caches[0].keys()) == set( ["tmp:off:x.list.item.y:x[0]", "tmp:col:x.list.item.z[0]"]) array.layout.field("x").array.content.field("y").array assert set(array.caches[0].keys()) == set([ "tmp:off:x.list.item.y:x[0]", "tmp:col:x.list.item.z[0]", "tmp:col:x.list.item.y[0]", ]) assert array.tolist() == data
def test_issue2(tmp_path): filename = os.path.join(tmp_path, "whatever.parquet") null_table = pyarrow.Table.from_pydict({"null_col": pyarrow.array([None])}) pyarrow_parquet.write_table(null_table, filename) assert ak.from_parquet(filename).type == ak.from_parquet(filename, lazy=True).type
def test_to_parquet_2(tmp_path): array = ak.Array([ [{ "x": 0.0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": None }], [], [{ "x": 3.3, "y": [1, 2, 3] }, None, { "x": 4.4, "y": [1, 2, 3, 4] }], ]) assert str( array.type) == '3 * var * ?{"x": float64, "y": option[var * int64]}' ak.to_parquet(array, os.path.join(tmp_path, "complicated-example.parquet")) array2 = ak.from_parquet( os.path.join(tmp_path, "complicated-example.parquet")) assert str(array2.type) == str(array.type) assert array2.tolist() == array.tolist()
def test_parquet2b(tmp_path): filename = os.path.join(tmp_path, "whatever.parquet") array = ak.Array( [ {"x": [{"y": 0.0, "z": 0}]}, {"x": [{"y": 1.1, "z": 1}]}, {"x": [{"y": 2.2, "z": 2}]}, ] ) ak.to_parquet(array, filename) lazy = ak.from_parquet(filename, lazy=True, lazy_cache=None) @numba.njit def f1(lazy): out = np.ones(3, np.float64) i = 0 for obj in lazy: for subobj in obj.x: out[i] = subobj.y i += 1 return out @numba.njit def f2(lazy): out = np.ones(3, np.float64) i = 0 for obj in lazy: for subobj in obj.x: out[i] = subobj.z i += 1 return out assert f1(lazy).tolist() == [0.0, 1.1, 2.2] assert f2(lazy).tolist() == [0, 1, 2]
def test(tmp_path): filename = os.path.join(tmp_path, "what-ever.parquet") fish = ak.Array([True, True])[np.newaxis] clob = ak.Array([2, 3, 7])[np.newaxis] frog = ak.zip({"c": clob, "f": fish}, depth_limit=1) ak.to_parquet(frog, filename) assert ak.from_parquet(filename).tolist() == frog.tolist()
def test_12(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test12.parquet") data = [ { "x": { "y": [] } }, { "x": { "y": [[one]] } }, { "x": { "y": [[one, two], [], [three]] } }, ] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert set(array.caches[0].keys()) == set() array.layout.field("x").array.field("y").array assert set(array.caches[0].keys()) == set(["tmp:lst:x.y[0]"]) assert array.tolist() == data
def test_parsing(header_version: int, events_per_chunk: int) -> None: here = Path(__file__).parent input_filename = here / "parsing" / f"final_state_hadrons_header_v{header_version}.dat" for i, arrays in enumerate( parse_ascii.read(filename=input_filename, events_per_chunk=events_per_chunk, parser="pandas")): # Get the reference array # Create the reference arrays by checking out the parser v1 (e477e0277fa560f9aba82310c02da8177e61c9e4), setting # the chunk size in skim_ascii, and then calling: # $ python jetscape_analysis/analysis/reader/skim_ascii.py -i tests/parsing/final_state_hadrons_header_v1.dat -o tests/parsing/events_per_chunk_50/parser_v1_header_v1/test.parquet # NOTE: The final state hadron files won't exist when you check out that branch, so # it's best to copy them for your existing branch. reference_arrays = ak.from_parquet( Path( f"{here}/parsing/events_per_chunk_{events_per_chunk}/parser_v1_header_v1/test_{i:02}.parquet" )) # There are more fields in v2 than in the reference arrays (v1), so only take those # that are present in reference for comparison. # NOTE: We have to compare the fields one-by-one because the shapes of the fields # are different, and apparently don't broadcast nicely with `__eq__` for field in ak.fields(reference_arrays): new_field = _rename_columns.get(field, field) assert ak.all(reference_arrays[field] == arrays[new_field]) # Check for cross section if header v2 if header_version == 2: assert "cross_section" in ak.fields(arrays) assert "cross_section_error" in ak.fields(arrays)
def test(): array = ak.Array([1, 2, 3]) file_ = io.BytesIO() ak.to_parquet(array, file_) file_.seek(0) array_from_file = ak.from_parquet(file_) assert ak.to_list(array) == ak.to_list(array_from_file)
def test_16(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test15.parquet") data = [[one, two], [], [three]] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() assert np.asarray(array.layout.array.offsets).tolist() == [0, 2, 2, 3] assert set(array.caches[0].keys()) == set(["tmp:lst:[0]"]) assert array.tolist() == data
def load(cls, path, *args, **kwargs): path = get_path(path) if path.endswith(".parquet"): import awkward as ak return ak.from_parquet(path, *args, **kwargs) # .pickle, .pkl return PickleFormatter.load(path, *args, **kwargs)
def test_1(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test1.parquet") data = [{"x": one}, {"x": two}, {"x": three}] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert set(array.caches[0].keys()) == set(["tmp:col:x[0]"]) assert array.tolist() == data
def test_4(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test4.parquet") data = [{"x": []}, {"x": [one]}, {"x": [one, two, three]}] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert set(array.caches[0].keys()) == {"tmp:lst:x[0]"} assert array.tolist() == data
def test_15(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test15.parquet") data = [one, two, three] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.array assert set(array.caches[0].keys()) == set(["tmp:col:[0]"]) assert array.tolist() == data
def test(tmp_path): filename = os.path.join(tmp_path, "test.parquet") dog = ak.from_iter([1, 2, 5]) cat = ak.from_iter([4]) pets = ak.zip({ "dog": dog[np.newaxis], "cat": cat[np.newaxis] }, depth_limit=1) ak.to_parquet(pets, filename) assert ak.from_parquet(filename).tolist() == pets.tolist()
def test_17(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test15.parquet") data = [[{"x": one}, {"x": two}], [], [{"x": three}]] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() assert np.asarray(array.layout.array.offsets).tolist() == [0, 2, 2, 3] assert set(array.caches[0].keys()) == set(["tmp:off:.list.item.x:[0]"]) array.layout.array.content.field("x").array assert set(array.caches[0].keys()) == set( ["tmp:off:.list.item.x:[0]", "tmp:col:.list.item.x[0]"]) assert array.tolist() == data
def test_pandas(tmp_path): df = pandas.DataFrame( {"x": np.arange(10), "y": np.arange(10) % 5, "z": ["low"] * 5 + ["high"] * 5} ) df.to_parquet(tmp_path, partition_cols=["z", "y"]) a = ak.from_parquet(tmp_path) assert a.z.tolist() == ["high"] * 5 + ["low"] * 5 # alphabetical assert a.y.tolist() == ["0", "1", "2", "3", "4", "0", "1", "2", "3", "4"] assert a.x.tolist() == [5, 6, 7, 8, 9, 0, 1, 2, 3, 4] b = ak.from_parquet(tmp_path, lazy=True) assert b.z.tolist() == ["high"] * 5 + ["low"] * 5 assert b.y.tolist() == ["0", "1", "2", "3", "4", "0", "1", "2", "3", "4"] assert b.x.tolist() == [5, 6, 7, 8, 9, 0, 1, 2, 3, 4] c = ak.from_parquet(tmp_path, include_partition_columns=False) assert ak.fields(c) == ["x"] d = ak.from_parquet(tmp_path, lazy=True, include_partition_columns=False) assert ak.fields(d) == ["x"]
def test_11(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test11.parquet") data = [ {"x": []}, {"x": [{"z": 1.1, "y": {"q": one}}]}, { "x": [ {"z": 1.1, "y": {"q": one}}, {"z": 2.2, "y": {"q": two}}, {"z": 3.3, "y": {"q": three}}, ] }, ] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert len(set(array.caches[0].keys())) == 1 assert np.asarray(array.layout.field("x").array.offsets).tolist() == [0, 0, 1, 4] assert len(set(array.caches[0].keys())) == 1 array.layout.field("x").array.content.field("y").array assert len(set(array.caches[0].keys())) == 1 array.layout.field("x").array.content.field("y").array.field("q").array assert len(set(array.caches[0].keys())) == 2 array.layout.field("x").array.content.field("z").array assert len(set(array.caches[0].keys())) == 3 assert array.tolist() == data array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert len(set(array.caches[0].keys())) == 1 assert np.asarray(array.layout.field("x").array.offsets).tolist() == [0, 0, 1, 4] assert len(set(array.caches[0].keys())) == 1 array.layout.field("x").array.content.field("y").array assert len(set(array.caches[0].keys())) == 1 array.layout.field("x").array.content.field("z").array assert len(set(array.caches[0].keys())) == 2 array.layout.field("x").array.content.field("y").array.field("q").array assert len(set(array.caches[0].keys())) == 3 assert array.tolist() == data
def test(tmp_path): filename = os.path.join(tmp_path, "test.parquet") ak.to_parquet(ak.repartition(range(8), 2), filename) assert ak.from_parquet(filename, row_groups=[1, 3]).tolist() == [2, 3, 6, 7] assert ak.from_parquet(filename, row_groups=[1, 3], lazy=True).tolist() == [ 2, 3, 6, 7, ] assert ak.from_parquet(tmp_path, row_groups=[1, 3]).tolist() == [2, 3, 6, 7] assert ak.from_parquet(tmp_path, row_groups=[1, 3], lazy=True).tolist() == [ 2, 3, 6, 7, ] ak.to_parquet.dataset(tmp_path) assert ak.from_parquet(tmp_path, row_groups=[1, 3]).tolist() == [2, 3, 6, 7] assert ak.from_parquet(tmp_path, row_groups=[1, 3], lazy=True).tolist() == [ 2, 3, 6, 7, ]
def test_parsing_with_parquet(header_version: int, events_per_chunk: int, tmp_path: Path) -> None: """Parse to parquet, read back, and compare.""" here = Path(__file__).parent input_filename = here / "parsing" / f"final_state_hadrons_header_v{header_version}.dat" # Convert to chunks in a temp directory. base_output_filename = tmp_path / "test.parquet" parse_ascii.parse_to_parquet(base_output_filename=base_output_filename, store_only_necessary_columns=True, input_filename=input_filename, events_per_chunk=events_per_chunk) output_filenames = tmp_path.glob("*.parquet") for i, output_filename in enumerate(sorted(output_filenames)): arrays = ak.from_parquet(output_filename) # Create the reference arrays by checking out the parser v1 (e477e0277fa560f9aba82310c02da8177e61c9e4), setting # the chunk size in skim_ascii, and then calling: # $ python jetscape_analysis/analysis/reader/skim_ascii.py -i tests/parsing/final_state_hadrons_header_v1.dat -o tests/parsing/events_per_chunk_50/parser_v1_header_v1/test.parquet # NOTE: The final state hadron files won't exist when you check out that branch, so # it's best to copy them for your existing branch. reference_arrays = ak.from_parquet( Path( f"{here}/parsing/events_per_chunk_{events_per_chunk}/parser_v1_header_v1/test_{i:02}.parquet" )) # There are more fields in v2 than in the reference arrays (v1), so only take those # that are present in reference for comparison. # NOTE: We have to compare the fields one-by-one because the shapes of the fields # are different, and apparently don't broadcast nicely with `__eq__` for field in ak.fields(reference_arrays): new_field = _rename_columns.get(field, field) assert ak.all(reference_arrays[field] == arrays[new_field]) # Check for cross section if header v2 if header_version == 2: assert "cross_section" in ak.fields(arrays) assert "cross_section_error" in ak.fields(arrays)
def test(tmp_path): one = ak.Array([[], [{"x": [{"y": 1}]}]]) two = ak.Array([[{"x": []}, {"x": [{"y": 1}]}]]) three = ak.Array([[{"x": [{"y": 1}]}], [], [{"x": [{"y": 2}]}]]) ak.to_parquet(one, tmp_path / "one.parquet") ak.to_parquet(two, tmp_path / "two.parquet") ak.to_parquet(three, tmp_path / "three.parquet") lazy_one = ak.from_parquet(tmp_path / "one.parquet", lazy=True) lazy_two = ak.from_parquet(tmp_path / "two.parquet", lazy=True) lazy_three = ak.from_parquet(tmp_path / "three.parquet", lazy=True) assert lazy_one.tolist() == [[], [{"x": [{"y": 1}]}]] assert lazy_two.tolist() == [[{"x": []}, {"x": [{"y": 1}]}]] assert lazy_three.tolist() == [[{ "x": [{ "y": 1 }] }], [], [{ "x": [{ "y": 2 }] }]]
def test_parquet1(tmp_path): filename = os.path.join(tmp_path, "whatever.parquet") array = ak.Array([{"x": {"y": 0.0}}, {"x": {"y": 1.1}}, {"x": {"y": 2.2}}]) ak.to_parquet(array, filename) lazy = ak.from_parquet(filename, lazy=True, lazy_cache=None) @numba.njit def f1(lazy): out = np.ones(3, np.float64) i = 0 for obj in lazy: out[i] = obj.x.y i += 1 return out assert f1(lazy).tolist() == [0.0, 1.1, 2.2]
def test_9(one, two, three, tmp_path): filename = os.path.join(str(tmp_path), "test9.parquet") data = [ {"x": []}, {"x": [{"y": {"q": one}}]}, {"x": [{"y": {"q": one}}, {"y": {"q": two}}, {"y": {"q": three}}]}, ] ak.to_parquet(ak.Array(data), filename) array = ak.from_parquet(filename, lazy=True, lazy_cache_key="tmp") assert set(array.caches[0].keys()) == set() array.layout.field("x").array assert set(array.caches[0].keys()) == {"tmp:off:x.list.item.y.q:x[0]"} assert np.asarray(array.layout.field("x").array.offsets).tolist() == [0, 0, 1, 4] assert set(array.caches[0].keys()) == {"tmp:off:x.list.item.y.q:x[0]"} array.layout.field("x").array.content.field("y").array assert set(array.caches[0].keys()) == {"tmp:off:x.list.item.y.q:x[0]"} array.layout.field("x").array.content.field("y").array.field("q").array assert set(array.caches[0].keys()) == { "tmp:off:x.list.item.y.q:x[0]", "tmp:col:x.list.item.y.q[0]", } assert array.tolist() == data
def test_to_parquet(tmp_path): original = ak.Array([ [{ "x": 1, "y": 1.1 }, { "x": 2, "y": 2.2 }, { "x": 3, "y": 3.3 }], [], [{ "x": 4, "y": 4.4 }, { "x": 5, "y": 5.5 }], [], [], [ { "x": 6, "y": 6.6 }, { "x": 7, "y": 7.7 }, { "x": 8, "y": 8.8 }, { "x": 9, "y": 9.9 }, ], ]) ak.to_parquet(original, os.path.join(tmp_path, "data.parquet")) reconstituted = ak.from_parquet(os.path.join(tmp_path, "data.parquet")) assert reconstituted.tolist() == [ [{ "x": 1, "y": 1.1 }, { "x": 2, "y": 2.2 }, { "x": 3, "y": 3.3 }], [], [{ "x": 4, "y": 4.4 }, { "x": 5, "y": 5.5 }], [], [], [ { "x": 6, "y": 6.6 }, { "x": 7, "y": 7.7 }, { "x": 8, "y": 8.8 }, { "x": 9, "y": 9.9 }, ], ] assert str(reconstituted.type) == '6 * var * {"x": int64, "y": float64}'
def test_parquet(): empty = ak.from_parquet("tests/samples/zero-record-batches.parquet") assert isinstance(empty, ak.Array) assert len(empty) == 0 assert str(empty.type) == "0 * {}"
import time import subprocess import awkward as ak compress = sys.argv[1] N = int(sys.argv[2]) is_split = sys.argv[3] == "split" s = "-split" if is_split else "" filename = f"/home/jpivarski/storage/data/chep-2021-jagged-jagged-jagged/{compress}{s}-jagged{N}.parquet" subprocess.call(f"vmtouch -t {filename} > /dev/null", shell=True) subprocess.call(f"vmtouch {filename} | fgrep Pages", shell=True) array = ak.from_parquet(filename, lazy=True) begintime = time.time() for partition in array.layout.partitions: tmp = partition.array endtime = time.time() print(f"pyarrow {compress}{s}-jagged{N}", endtime - begintime, "seconds") array = ak.from_parquet(filename, lazy=True) begintime = time.time() for partition in array.layout.partitions: tmp = partition.array
def load_df(): return ak.from_parquet(good_uproot_file_path) # type: ignore
ew = Components.EventWise.from_file("../megaIgnore/IRCchecks_noPTcut1/iridis_pp_to_jjj_lo1_fragment/iridis_pp_to_jjj_lo1_fragment0.parquet") spectral_jets = list({name.split('_')[0] for name in ew.columns if name.startswith("Spect") and "IRC" not in name}) # files that contian kinematic info file_name = "../megaIgnore/IRCchecks_noPTcut{}/iridis_pp_to_jjj_{}{}_fragment/kinematics.parquet" end_time = time.time() + 60*60*36 ew_shapes = Components.EventWise.from_file("../megaIgnore/IRC_shapes2.parquet") for n in range(1, 5): spectral_shapes = [[[[] for _ in spectral_jets] for _ in ew_shapes.shape_names] for _ in ew_shapes.orders] for order in ["nlo", "lo"]: o_idx = list(ew_shapes.orders).index(order) name = file_name.format(n, order) kinematics = ak.from_parquet(name) print(name) print("Getting jet shapes") for j_idx, jname in enumerate(spectral_jets): print('.', end='', flush=True) ew.selected_event = None for event_n in range(len(kinematics[o_idx, 0, 0])): ew.selected_event = event_n event_kinematics = ak.to_numpy(kinematics[o_idx, 1:, j_idx, event_n, :]) if np.any(np.isnan(event_kinematics)) or len(event_kinematics) != 4: shapes = [np.nan for _ in ew_shapes.shape_names] else: shape_dict = ShapeVariables.shape(*event_kinematics)[1] shapes = [shape_dict[name] for name in ew_shapes.shape_names] for i, val in enumerate(shapes):