Beispiel #1
0
def _ensure_flat(array, allow_missing=False):
    """Normalize an array to a flat numpy array or raise ValueError"""
    if isinstance(array, awkward.AwkwardArray):
        array = awkward1.from_awkward0(array)
    elif not isinstance(array, (awkward1.Array, numpy.ndarray)):
        raise ValueError("Expected a numpy or awkward array, received: %r" %
                         array)

    aktype = awkward1.type(array)
    if not isinstance(aktype, awkward1.types.ArrayType):
        raise ValueError("Expected an array type, received: %r" % aktype)
    isprimitive = isinstance(aktype.type, awkward1.types.PrimitiveType)
    isoptionprimitive = isinstance(
        aktype.type, awkward1.types.OptionType) and isinstance(
            aktype.type.type, awkward1.types.PrimitiveType)
    if allow_missing and not (isprimitive or isoptionprimitive):
        raise ValueError(
            "Expected an array of type N * primitive or N * ?primitive, received: %r"
            % aktype)
    if not (allow_missing or isprimitive):
        raise ValueError(
            "Expected an array of type N * primitive, received: %r" % aktype)
    if isinstance(array, awkward1.Array):
        array = awkward1.to_numpy(array, allow_missing=allow_missing)
    return array
def transform_single_file(file_path,
                          output_path,
                          servicex=None,
                          tree_name='Events'):
    print("Transforming a single path: " + str(file_path))

    try:
        import generated_transformer
        start_transform = time.time()
        table = generated_transformer.run_query(file_path, tree_name)
        end_transform = time.time()
        print(
            f'generated_transformer.py: {round(end_transform - start_transform, 2)} sec'
        )

        start_serialization = time.time()
        table_awk1 = awkward1.from_awkward0(table)
        new_table = awkward1.to_awkward0(table_awk1)
        arrow = awkward.toarrow(new_table)
        end_serialization = time.time()
        print(
            f'awkward Table -> Arrow: {round(end_serialization - start_serialization, 2)} sec'
        )

        if output_path:
            writer = pq.ParquetWriter(output_path, arrow.schema)
            writer.write_table(table=arrow)
            writer.close()

    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_tb(exc_traceback, limit=20, file=sys.stdout)
        print(exc_value)

        raise RuntimeError("Failed to transform input file " + file_path +
                           ": " + str(exc_value))

    if messaging:
        arrow_writer = ArrowWriter(file_format=args.result_format,
                                   object_store=None,
                                   messaging=messaging)

        #Todo implement chunk size parameter
        transformer = ArrowIterator(arrow,
                                    chunk_size=1000,
                                    file_path=file_path)
        arrow_writer.write_branches_to_arrow(transformer=transformer,
                                             topic_name=args.request_id,
                                             file_id=None,
                                             request_id=args.request_id)
Beispiel #3
0
def returnEventStructure(unsmearedEventsA, smearedEventsA):
    arrays = {
        name: ak.from_awkward0(array)
        for name, array in smearedEventsA.items()
    }
    arraysTruth = {
        name: ak.from_awkward0(array)
        for name, array in unsmearedEventsA.items()
    }
    events = ak.zip({
        "evnum":
        arrays["evnum"],
        "true":
        ak.zip(
            {
                "x":
                arraysTruth["x"],
                "y":
                arraysTruth["y"],
                "Q2":
                arraysTruth["Q2"],
                "pair":
                ak.zip(
                    {
                        "Z": arraysTruth["Z"],
                        "hadP": arraysTruth["hadP"],
                        "hadPt": arraysTruth["hadPt"],
                        "hadEta": arraysTruth["hadEta"],
                        "hadPhi": arraysTruth["hadPhi"]
                    },
                    depthlimit=1),
                "PhPerp":
                arraysTruth["PhPerp"],
                "PhEta":
                arraysTruth["PhEta"],
                "PhPhi":
                arraysTruth["PhPhi"],
                "Ph":
                arraysTruth["Ph"]
            },
            depthlimit=1),
        "rec":
        ak.zip(
            {
                "x":
                arrays["x"],
                "y":
                arrays["y"],
                "Q2":
                arrays["Q2"],
                "PhPerp":
                arrays["PhPerp"],
                "PhEta":
                arrays["PhEta"],
                "PhPhi":
                arrays["PhPhi"],
                "Ph":
                arrays["Ph"],
                "pair":
                ak.zip(
                    {
                        "Z": arrays["Z"],
                        "hadP": arrays["hadP"],
                        "hadPt": arrays["hadPt"],
                        "hadEta": arrays["hadEta"],
                        "hadPhi": arrays["hadPhi"]
                    },
                    depthlimit=1)
            },
            depthlimit=1)
    })
    return events
Beispiel #4
0
def test_fromawkward0():
    array = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5])
    assert isinstance(awkward1.from_awkward0(array), awkward1.highlevel.Array)
    assert isinstance(awkward1.from_awkward0(array, highlevel=False),
                      awkward1.layout.NumpyArray)
    assert awkward1.to_list(array) == [1.1, 2.2, 3.3, 4.4, 5.5]

    array = (123, numpy.array([1.1, 2.2, 3.3]))
    assert isinstance(awkward1.from_awkward0(array), awkward1.highlevel.Record)
    assert isinstance(
        awkward1.from_awkward0(array).layout, awkward1.layout.Record)
    assert awkward1.to_list(awkward1.from_awkward0(array)) == (123,
                                                               [1.1, 2.2, 3.3])

    array = {"x": 123, "y": numpy.array([1.1, 2.2, 3.3])}
    assert isinstance(awkward1.from_awkward0(array), awkward1.highlevel.Record)
    assert isinstance(
        awkward1.from_awkward0(array).layout, awkward1.layout.Record)
    assert awkward1.to_list(awkward1.from_awkward0(array)) == {
        "x": 123,
        "y": [1.1, 2.2, 3.3]
    }

    array = awkward0.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
    assert isinstance(
        awkward1.from_awkward0(array, highlevel=False),
        (awkward1.layout.ListOffsetArray32, awkward1.layout.ListOffsetArrayU32,
         awkward1.layout.ListOffsetArray64))
    assert awkward1.to_list(awkward1.from_awkward0(array)) == [[1.1, 2.2, 3.3],
                                                               [], [4.4, 5.5]]

    array = awkward0.fromiter([{
        "x": 0,
        "y": []
    }, {
        "x": 1.1,
        "y": [1]
    }, {
        "x": 2.2,
        "y": [2, 2]
    }])
    assert isinstance(awkward1.from_awkward0(array, highlevel=False),
                      awkward1.layout.RecordArray)
    assert not awkward1.from_awkward0(array, highlevel=False).istuple
    assert awkward1.from_awkward0(array).layout.keys() == ["x", "y"]
    assert awkward1.to_list(awkward1.from_awkward0(array)) == [{
        "x": 0,
        "y": []
    }, {
        "x": 1.1,
        "y": [1]
    }, {
        "x": 2.2,
        "y": [2, 2]
    }]

    array = awkward0.Table([0.0, 1.1, 2.2], awkward0.fromiter([[], [1], [2,
                                                                         2]]))
    assert isinstance(awkward1.from_awkward0(array, highlevel=False),
                      awkward1.layout.RecordArray)
    assert awkward1.from_awkward0(array, highlevel=False).istuple
    assert awkward1.from_awkward0(array).layout.keys() == ["0", "1"]
    assert awkward1.to_list(awkward1.from_awkward0(array)) == [(0.0, []),
                                                               (1.1, [1]),
                                                               (2.2, [2, 2])]

    array = awkward0.fromiter([0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]])
    assert isinstance(
        awkward1.from_awkward0(array, highlevel=False),
        (awkward1.layout.UnionArray8_32, awkward1.layout.UnionArray8_U32,
         awkward1.layout.UnionArray8_64))
    assert awkward1.to_list(awkward1.from_awkward0(array)) == [
        0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]
    ]

    array = awkward0.fromiter([1.1, 2.2, None, None, 3.3, None, 4.4])
    assert isinstance(awkward1.from_awkward0(array, highlevel=False),
                      awkward1.layout.ByteMaskedArray)
    assert awkward1.to_list(awkward1.from_awkward0(array)) == [
        1.1, 2.2, None, None, 3.3, None, 4.4
    ]

    array = awkward0.fromiter(["hello", "you", "guys"])
    assert isinstance(
        awkward1.from_awkward0(array, highlevel=False),
        (awkward1.layout.ListArray32, awkward1.layout.ListArrayU32,
         awkward1.layout.ListArray64, awkward1.layout.ListOffsetArray32,
         awkward1.layout.ListOffsetArrayU32,
         awkward1.layout.ListOffsetArray64))
    assert awkward1.from_awkward0(
        array, highlevel=False).parameters["__array__"] in ("string", "bytes")
    assert awkward1.from_awkward0(
        array,
        highlevel=False).content.parameters["__array__"] in ("utf8", "char")
    assert awkward1.to_list(
        awkward1.from_awkward0(array)) == ["hello", "you", "guys"]

    class Point(object):
        def __init__(self, x, y):
            self.x, self.y = x, y

        def __repr__(self):
            return "Point({0}, {1})".format(self.x, self.y)

    array = awkward0.fromiter([Point(1.1, 10), Point(2.2, 20), Point(3.3, 30)])
    assert awkward1.to_list(awkward1.from_awkward0(array)) == [{
        "x": 1.1,
        "y": 10
    }, {
        "x": 2.2,
        "y": 20
    }, {
        "x": 3.3,
        "y": 30
    }]
    assert "__record__" in awkward1.from_awkward0(array).layout.parameters

    array = awkward0.ChunkedArray([
        awkward0.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]),
        awkward0.fromiter([[6.6]]),
        awkward0.fromiter([[7.7, 8.8], [9.9, 10.0, 11.1, 12.2]])
    ])
    assert awkward1.to_list(
        awkward1.from_awkward0(array)) == [[1.1, 2.2, 3.3], [], [4.4, 5.5],
                                           [6.6], [7.7, 8.8],
                                           [9.9, 10.0, 11.1, 12.2]]

    def generate1():
        return awkward0.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]])

    def generate2():
        return awkward0.fromiter([[6.6]])

    def generate3():
        return awkward0.fromiter([[7.7, 8.8], [9.9, 10.0, 11.1, 12.2]])

    array = awkward0.ChunkedArray([
        awkward0.VirtualArray(generate1),
        awkward0.VirtualArray(generate2),
        awkward0.VirtualArray(generate3)
    ])
    assert awkward1.to_list(
        awkward1.from_awkward0(array)) == [[1.1, 2.2, 3.3], [], [4.4, 5.5],
                                           [6.6], [7.7, 8.8],
                                           [9.9, 10.0, 11.1, 12.2]]