def _ensure_flat(array, allow_missing=False): """Normalize an array to a flat numpy array or raise ValueError""" if isinstance(array, awkward.AwkwardArray): array = awkward1.from_awkward0(array) elif not isinstance(array, (awkward1.Array, numpy.ndarray)): raise ValueError("Expected a numpy or awkward array, received: %r" % array) aktype = awkward1.type(array) if not isinstance(aktype, awkward1.types.ArrayType): raise ValueError("Expected an array type, received: %r" % aktype) isprimitive = isinstance(aktype.type, awkward1.types.PrimitiveType) isoptionprimitive = isinstance( aktype.type, awkward1.types.OptionType) and isinstance( aktype.type.type, awkward1.types.PrimitiveType) if allow_missing and not (isprimitive or isoptionprimitive): raise ValueError( "Expected an array of type N * primitive or N * ?primitive, received: %r" % aktype) if not (allow_missing or isprimitive): raise ValueError( "Expected an array of type N * primitive, received: %r" % aktype) if isinstance(array, awkward1.Array): array = awkward1.to_numpy(array, allow_missing=allow_missing) return array
def transform_single_file(file_path, output_path, servicex=None, tree_name='Events'): print("Transforming a single path: " + str(file_path)) try: import generated_transformer start_transform = time.time() table = generated_transformer.run_query(file_path, tree_name) end_transform = time.time() print( f'generated_transformer.py: {round(end_transform - start_transform, 2)} sec' ) start_serialization = time.time() table_awk1 = awkward1.from_awkward0(table) new_table = awkward1.to_awkward0(table_awk1) arrow = awkward.toarrow(new_table) end_serialization = time.time() print( f'awkward Table -> Arrow: {round(end_serialization - start_serialization, 2)} sec' ) if output_path: writer = pq.ParquetWriter(output_path, arrow.schema) writer.write_table(table=arrow) writer.close() except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(exc_value) raise RuntimeError("Failed to transform input file " + file_path + ": " + str(exc_value)) if messaging: arrow_writer = ArrowWriter(file_format=args.result_format, object_store=None, messaging=messaging) #Todo implement chunk size parameter transformer = ArrowIterator(arrow, chunk_size=1000, file_path=file_path) arrow_writer.write_branches_to_arrow(transformer=transformer, topic_name=args.request_id, file_id=None, request_id=args.request_id)
def returnEventStructure(unsmearedEventsA, smearedEventsA): arrays = { name: ak.from_awkward0(array) for name, array in smearedEventsA.items() } arraysTruth = { name: ak.from_awkward0(array) for name, array in unsmearedEventsA.items() } events = ak.zip({ "evnum": arrays["evnum"], "true": ak.zip( { "x": arraysTruth["x"], "y": arraysTruth["y"], "Q2": arraysTruth["Q2"], "pair": ak.zip( { "Z": arraysTruth["Z"], "hadP": arraysTruth["hadP"], "hadPt": arraysTruth["hadPt"], "hadEta": arraysTruth["hadEta"], "hadPhi": arraysTruth["hadPhi"] }, depthlimit=1), "PhPerp": arraysTruth["PhPerp"], "PhEta": arraysTruth["PhEta"], "PhPhi": arraysTruth["PhPhi"], "Ph": arraysTruth["Ph"] }, depthlimit=1), "rec": ak.zip( { "x": arrays["x"], "y": arrays["y"], "Q2": arrays["Q2"], "PhPerp": arrays["PhPerp"], "PhEta": arrays["PhEta"], "PhPhi": arrays["PhPhi"], "Ph": arrays["Ph"], "pair": ak.zip( { "Z": arrays["Z"], "hadP": arrays["hadP"], "hadPt": arrays["hadPt"], "hadEta": arrays["hadEta"], "hadPhi": arrays["hadPhi"] }, depthlimit=1) }, depthlimit=1) }) return events
def test_fromawkward0(): array = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5]) assert isinstance(awkward1.from_awkward0(array), awkward1.highlevel.Array) assert isinstance(awkward1.from_awkward0(array, highlevel=False), awkward1.layout.NumpyArray) assert awkward1.to_list(array) == [1.1, 2.2, 3.3, 4.4, 5.5] array = (123, numpy.array([1.1, 2.2, 3.3])) assert isinstance(awkward1.from_awkward0(array), awkward1.highlevel.Record) assert isinstance( awkward1.from_awkward0(array).layout, awkward1.layout.Record) assert awkward1.to_list(awkward1.from_awkward0(array)) == (123, [1.1, 2.2, 3.3]) array = {"x": 123, "y": numpy.array([1.1, 2.2, 3.3])} assert isinstance(awkward1.from_awkward0(array), awkward1.highlevel.Record) assert isinstance( awkward1.from_awkward0(array).layout, awkward1.layout.Record) assert awkward1.to_list(awkward1.from_awkward0(array)) == { "x": 123, "y": [1.1, 2.2, 3.3] } array = awkward0.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) assert isinstance( awkward1.from_awkward0(array, highlevel=False), (awkward1.layout.ListOffsetArray32, awkward1.layout.ListOffsetArrayU32, awkward1.layout.ListOffsetArray64)) assert awkward1.to_list(awkward1.from_awkward0(array)) == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] array = awkward0.fromiter([{ "x": 0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": [2, 2] }]) assert isinstance(awkward1.from_awkward0(array, highlevel=False), awkward1.layout.RecordArray) assert not awkward1.from_awkward0(array, highlevel=False).istuple assert awkward1.from_awkward0(array).layout.keys() == ["x", "y"] assert awkward1.to_list(awkward1.from_awkward0(array)) == [{ "x": 0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": [2, 2] }] array = awkward0.Table([0.0, 1.1, 2.2], awkward0.fromiter([[], [1], [2, 2]])) assert isinstance(awkward1.from_awkward0(array, highlevel=False), awkward1.layout.RecordArray) assert awkward1.from_awkward0(array, highlevel=False).istuple assert awkward1.from_awkward0(array).layout.keys() == ["0", "1"] assert awkward1.to_list(awkward1.from_awkward0(array)) == [(0.0, []), (1.1, [1]), (2.2, [2, 2])] array = awkward0.fromiter([0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]]) assert isinstance( awkward1.from_awkward0(array, highlevel=False), (awkward1.layout.UnionArray8_32, awkward1.layout.UnionArray8_U32, awkward1.layout.UnionArray8_64)) assert awkward1.to_list(awkward1.from_awkward0(array)) == [ 0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3] ] array = awkward0.fromiter([1.1, 2.2, None, None, 3.3, None, 4.4]) assert isinstance(awkward1.from_awkward0(array, highlevel=False), awkward1.layout.ByteMaskedArray) assert awkward1.to_list(awkward1.from_awkward0(array)) == [ 1.1, 2.2, None, None, 3.3, None, 4.4 ] array = awkward0.fromiter(["hello", "you", "guys"]) assert isinstance( awkward1.from_awkward0(array, highlevel=False), (awkward1.layout.ListArray32, awkward1.layout.ListArrayU32, awkward1.layout.ListArray64, awkward1.layout.ListOffsetArray32, awkward1.layout.ListOffsetArrayU32, awkward1.layout.ListOffsetArray64)) assert awkward1.from_awkward0( array, highlevel=False).parameters["__array__"] in ("string", "bytes") assert awkward1.from_awkward0( array, highlevel=False).content.parameters["__array__"] in ("utf8", "char") assert awkward1.to_list( awkward1.from_awkward0(array)) == ["hello", "you", "guys"] class Point(object): def __init__(self, x, y): self.x, self.y = x, y def __repr__(self): return "Point({0}, {1})".format(self.x, self.y) array = awkward0.fromiter([Point(1.1, 10), Point(2.2, 20), Point(3.3, 30)]) assert awkward1.to_list(awkward1.from_awkward0(array)) == [{ "x": 1.1, "y": 10 }, { "x": 2.2, "y": 20 }, { "x": 3.3, "y": 30 }] assert "__record__" in awkward1.from_awkward0(array).layout.parameters array = awkward0.ChunkedArray([ awkward0.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]), awkward0.fromiter([[6.6]]), awkward0.fromiter([[7.7, 8.8], [9.9, 10.0, 11.1, 12.2]]) ]) assert awkward1.to_list( awkward1.from_awkward0(array)) == [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8], [9.9, 10.0, 11.1, 12.2]] def generate1(): return awkward0.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) def generate2(): return awkward0.fromiter([[6.6]]) def generate3(): return awkward0.fromiter([[7.7, 8.8], [9.9, 10.0, 11.1, 12.2]]) array = awkward0.ChunkedArray([ awkward0.VirtualArray(generate1), awkward0.VirtualArray(generate2), awkward0.VirtualArray(generate3) ]) assert awkward1.to_list( awkward1.from_awkward0(array)) == [[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6], [7.7, 8.8], [9.9, 10.0, 11.1, 12.2]]