Beispiel #1
0
def test_ArrayBuilder_append_2():
    # issue #415
    A = awkward1.from_numpy(numpy.array([0, 1, 2], dtype=numpy.float32))
    B = awkward1.from_numpy(numpy.array([0, 1], dtype=numpy.float32))

    builder = awkward1.ArrayBuilder()
    with builder.list():
        builder.append(A.tolist())
    with builder.list():
        builder.append(A.tolist())
    with builder.list():
        pass
    with builder.list():
        builder.append(B.tolist())

    assert builder.snapshot().tolist() == [[[0, 1, 2]], [[0, 1, 2]], [],
                                           [[0, 1]]]
    assert str(awkward1.type(builder.snapshot())) == "4 * var * var * float64"

    builder = awkward1.ArrayBuilder()
    with builder.list():
        builder.append(A)
    with builder.list():
        builder.append(A)
    with builder.list():
        pass
    with builder.list():
        builder.append(B)

    assert builder.snapshot().tolist() == [[[0, 1, 2]], [[0, 1, 2]], [],
                                           [[0, 1]]]
    assert str(awkward1.type(builder.snapshot())) == "4 * var * var * float32"

    @numba.njit
    def f1(builder, A, B):
        builder.begin_list()
        builder.append(A)
        builder.end_list()

        builder.begin_list()
        builder.append(A)
        builder.end_list()

        builder.begin_list()
        builder.end_list()

        builder.begin_list()
        builder.append(B)
        builder.end_list()

        return builder

    assert f1(awkward1.ArrayBuilder(),
              A, B).snapshot().tolist() == [[[0, 1, 2]], [[0, 1, 2]], [],
                                            [[0, 1]]]
def test_lists():
    one = awkward1.Array([[1, 2, 3], [], [4, 5]]).layout
    two = awkward1.Array([[1.1, 2.2], [3.3, 4.4]]).layout
    three = awkward1.layout.EmptyArray()
    four = awkward1.from_numpy(numpy.array([[10], [20]]),
                               regulararray=True,
                               highlevel=False)
    assert awkward1.to_list(one.mergemany([two, three,
                                           four])) == [[1.0, 2.0, 3.0], [],
                                                       [4.0, 5.0], [1.1, 2.2],
                                                       [3.3, 4.4], [10.0],
                                                       [20.0]]
    assert awkward1.to_list(four.mergemany([three, two,
                                            one])) == [[10.0], [20.0],
                                                       [1.1, 2.2], [3.3, 4.4],
                                                       [1.0, 2.0, 3.0], [],
                                                       [4.0, 5.0]]

    one = awkward1.layout.ListArray64(one.starts, one.stops, one.content)
    two = awkward1.layout.ListArray64(two.starts, two.stops, two.content)
    assert awkward1.to_list(one.mergemany([two, three,
                                           four])) == [[1.0, 2.0, 3.0], [],
                                                       [4.0, 5.0], [1.1, 2.2],
                                                       [3.3, 4.4], [10.0],
                                                       [20.0]]
    assert awkward1.to_list(four.mergemany([three, two,
                                            one])) == [[10.0], [20.0],
                                                       [1.1, 2.2], [3.3, 4.4],
                                                       [1.0, 2.0, 3.0], [],
                                                       [4.0, 5.0]]
def test_localindex():
    array = awkward1.from_iter(
        [[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]],
        highlevel=False)
    assert awkward1.to_list(array.localindex(0)) == [0, 1, 2, 3, 4]
    assert awkward1.to_list(array.localindex(1)) == [[0, 1, 2], [], [0, 1],
                                                     [0], [0, 1, 2, 3]]

    array = awkward1.from_iter([[[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], [[5.5]],
                                [[6.6, 7.7, 8.8, 9.9]]],
                               highlevel=False)
    assert awkward1.to_list(array.localindex(0)) == [0, 1, 2, 3]
    assert awkward1.to_list(array.localindex(1)) == [[0, 1, 2], [], [0], [0]]
    assert awkward1.to_list(array.localindex(2)) == [[[0, 1, 2], [], [0, 1]],
                                                     [], [[0]], [[0, 1, 2, 3]]]

    array = awkward1.from_numpy(numpy.arange(2 * 3 * 5).reshape(2, 3, 5),
                                regulararray=True,
                                highlevel=False)
    assert awkward1.to_list(array.localindex(0)) == [0, 1]
    assert awkward1.to_list(array.localindex(1)) == [[0, 1, 2], [0, 1, 2]]
    assert awkward1.to_list(array.localindex(2)) == [[[0, 1, 2, 3, 4],
                                                      [0, 1, 2, 3, 4],
                                                      [0, 1, 2, 3, 4]],
                                                     [[0, 1, 2, 3, 4],
                                                      [0, 1, 2, 3, 4],
                                                      [0, 1, 2, 3, 4]]]
def test_regulararray():
    layout = awkward1.from_numpy(numpy.array([[1, 2, 3], [4, 5, 6]]), regulararray=True, highlevel=False)

    numbatype = awkward1._connect._numba.arrayview.tonumbatype(layout.form)
    assert awkward1_connect_numba_layout.typeof(layout).name == numbatype.name

    lookup1 = awkward1_connect_numba_arrayview.Lookup(layout)
    lookup2 = awkward1_connect_numba_arrayview.Lookup(layout.form)
    numbatype.form_fill(0, layout, lookup2)

    assert numpy.array_equal(lookup1.arrayptrs, lookup2.arrayptrs)
    assert numpy.array_equal(lookup1.sharedptrs == -1, lookup2.sharedptrs == -1)

    counter = [0]
    def materialize():
        counter[0] += 1
        return layout

    generator = awkward1.layout.ArrayGenerator(materialize, form=layout.form, length=len(layout))
    virtualarray = awkward1.layout.VirtualArray(generator)

    lookup3 = awkward1_connect_numba_arrayview.Lookup(virtualarray)
    assert len(lookup1.arrayptrs) + 3 == len(lookup3.arrayptrs)

    array = awkward1.Array(virtualarray)
    array.numba_type
    assert counter[0] == 0

    @numba.njit
    def f3(x):
        return x

    assert isinstance(f3(array).layout, awkward1.layout.VirtualArray)
    assert counter[0] == 0

    @numba.njit
    def f1(x):
        return x[1][1]

    assert f1(array) == 5
    assert counter[0] == 1

    assert f1(array) == 5
    assert counter[0] == 1

    @numba.njit
    def f2(x):
        return x[1]

    assert awkward1.to_list(f2(array)) == [4, 5, 6]
    assert counter[0] == 1

    assert awkward1.to_list(f2(array)) == [4, 5, 6]
    assert counter[0] == 1

    assert awkward1.to_list(f3(array)) == [[1, 2, 3], [4, 5, 6]]
Beispiel #5
0
    def test_compare_optim_methods(self):
        subfolder = "ExampleTinyModifiedObs"  # big data from classical v2
        folder = join("Datasets", subfolder)

        obs_mat, attrs = load_standard_path_format_csv(folder,
                                                       delim=" ",
                                                       angles_included=True)
        import awkward1 as ak
        obs_mat = obs_mat.toarray()
        obs_record = ak.from_numpy(obs_mat)
        incidence_mat, travel_times_mat, angle_cts_mat = attrs
        left, _, _, u_turn = AngleProcessor.get_turn_categorical_matrices(
            angle_cts_mat, incidence_mat)
        data_list = [travel_times_mat, left, u_turn]
        network_data_struct = ModelDataStruct(data_list, incidence_mat)

        # network_data_struct.add_second_travel_time_for_testing()
        optimiser = optimisers.LineSearchOptimiser(
            optimisers.OptimHessianType.BFGS, max_iter=4)
        RecursiveLogitModelEstimation.zeros_error_override = False
        model = RecursiveLogitModelEstimation(network_data_struct,
                                              optimiser,
                                              observations_record=obs_record,
                                              initial_beta=-15)

        m1_ll_out, m1_grad_out = model.get_log_likelihood()

        optimiser2 = optimisers.ScipyOptimiser(method='newton-cg')

        model2 = RecursiveLogitModelEstimation(network_data_struct,
                                               optimiser2,
                                               observations_record=obs_record,
                                               initial_beta=-15)
        m2_ll_out, m2_grad_out = model2.get_log_likelihood()

        assert np.allclose(m2_ll_out, m1_ll_out)
        assert np.allclose(m2_grad_out, m1_grad_out)

        beta1 = model.solve_for_optimal_beta()

        beta2 = model2.solve_for_optimal_beta(verbose=True)
        m1_ll_out, m1_grad_out = model.get_log_likelihood()
        m2_ll_out, m2_grad_out = model2.get_log_likelihood()
        print(m1_ll_out, m2_ll_out)
        print(m1_grad_out, m2_grad_out)

        assert np.allclose(beta1, beta2, 0.34657)

        RecursiveLogitModelEstimation.zeros_error_override = None
Beispiel #6
0
    def test_example_tiny_modified_awkward_array(self):
        subfolder = "ExampleTinyModifiedObs"  # big data from classical v2
        folder = join("Datasets", subfolder)

        obs_mat, attrs = load_standard_path_format_csv(folder,
                                                       delim=" ",
                                                       angles_included=True)
        import awkward1 as ak
        obs_mat = obs_mat.toarray()
        obs_record = ak.from_numpy(obs_mat)
        incidence_mat, travel_times_mat, angle_cts_mat = attrs
        left, _, _, u_turn = AngleProcessor.get_turn_categorical_matrices(
            angle_cts_mat, incidence_mat)
        # incidence matrix which only has nonzero travel times
        # - rather than what is specified in file
        t_time_incidence = (travel_times_mat > 0).astype('int').todok()
        self._tiny_modified_common_data_checks(travel_times_mat, left, u_turn,
                                               t_time_incidence, incidence_mat,
                                               obs_record)
Beispiel #7
0
def pandas_series_to_awkward(series, version=1):
    values = series.values
    if "fletcher" not in str(values.dtype).lower():
        if version == 1:
            return awkward1.from_numpy(values)
        else:
            return np.array(values, copy=False)

    array_arrow = values.data

    if version == 0:
        array = awkward0.fromarrow(array_arrow)
        if "MaskedArray" in str(type(array)):
            array = array._content[array.boolmask()]
    elif version == 1:
        array = awkward1.from_arrow(array_arrow)
    else:
        raise RuntimeError(
            "What version of awkward do you want? Specify `version=0` or `1`."
        )
    return array
Beispiel #8
0
def test_fromnumpy():
    a = numpy.arange(2 * 3 * 5).reshape((2, 3, 5))
    b = awkward1.from_numpy(a)
    assert awkward1.to_list(a) == awkward1.to_list(b)
def test_fromnumpy():
    assert awkward1.to_list(awkward1.from_numpy(numpy.array(["uno", "dos", "tres", "quatro"]))) == ["uno", "dos", "tres", "quatro"]
    assert awkward1.to_list(awkward1.from_numpy(numpy.array([["uno", "dos"], ["tres", "quatro"]]))) == [["uno", "dos"], ["tres", "quatro"]]
    assert awkward1.to_list(awkward1.from_numpy(numpy.array([["uno", "dos"], ["tres", "quatro"]]), regulararray=True)) == [["uno", "dos"], ["tres", "quatro"]]
Beispiel #10
0
def test_toawkward0():
    array = awkward1.from_iter([1.1, 2.2, 3.3, 4.4], highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), numpy.ndarray)
    assert awkward1.to_awkward0(array).tolist() == [1.1, 2.2, 3.3, 4.4]

    array = awkward1.from_numpy(numpy.arange(2 * 3 * 5).reshape(2, 3, 5),
                                highlevel=False).toRegularArray()
    assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray)
    assert awkward1.to_awkward0(array).tolist() == [[[0, 1, 2, 3, 4],
                                                     [5, 6, 7, 8, 9],
                                                     [10, 11, 12, 13, 14]],
                                                    [[15, 16, 17, 18, 19],
                                                     [20, 21, 22, 23, 24],
                                                     [25, 26, 27, 28, 29]]]

    array = awkward1.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]],
                               highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray)
    assert awkward1.to_awkward0(array).tolist() == [[1.1, 2.2, 3.3], [],
                                                    [4.4, 5.5]]

    array = awkward1.layout.ListArray64(
        awkward1.layout.Index64(numpy.array([4, 999, 1], dtype=numpy.int64)),
        awkward1.layout.Index64(numpy.array([7, 999, 3], dtype=numpy.int64)),
        awkward1.layout.NumpyArray(
            numpy.array([3.14, 4.4, 5.5, 123, 1.1, 2.2, 3.3, 321])))
    assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray)
    assert awkward1.to_awkward0(array).tolist() == [[1.1, 2.2, 3.3], [],
                                                    [4.4, 5.5]]

    array = awkward1.from_iter([{
        "x": 0,
        "y": []
    }, {
        "x": 1.1,
        "y": [1]
    }, {
        "x": 2.2,
        "y": [2, 2]
    }, {
        "x": 3.3,
        "y": [3, 3, 3]
    }],
                               highlevel=False)
    assert isinstance(awkward1.to_awkward0(array[2]), dict)
    assert awkward1.to_awkward0(array[2])["x"] == 2.2
    assert isinstance(awkward1.to_awkward0(array[2])["y"], numpy.ndarray)
    assert awkward1.to_awkward0(array[2])["y"].tolist() == [2, 2]

    assert isinstance(awkward1.to_awkward0(array), awkward0.Table)
    assert awkward1.to_awkward0(array).tolist() == [{
        "x": 0,
        "y": []
    }, {
        "x": 1.1,
        "y": [1]
    }, {
        "x": 2.2,
        "y": [2, 2]
    }, {
        "x": 3.3,
        "y": [3, 3, 3]
    }]

    array = awkward1.from_iter([(0, []), (1.1, [1]), (2.2, [2, 2]),
                                (3.3, [3, 3, 3])],
                               highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), awkward0.Table)
    assert awkward1.to_awkward0(array).tolist() == [(0, []), (1.1, [1]),
                                                    (2.2, [2, 2]),
                                                    (3.3, [3, 3, 3])]
    assert isinstance(awkward1.to_awkward0(array[2]), tuple)
    assert awkward1.to_awkward0(array[2])[0] == 2.2
    assert awkward1.to_awkward0(array[2])[1].tolist() == [2, 2]

    array = awkward1.from_iter(
        [0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]], highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), awkward0.UnionArray)
    assert awkward1.to_awkward0(array).tolist() == [
        0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]
    ]

    array = awkward1.from_iter([1.1, 2.2, None, None, 3.3, None, 4.4],
                               highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), awkward0.IndexedMaskedArray)
    assert awkward1.to_awkward0(array).tolist() == [
        1.1, 2.2, None, None, 3.3, None, 4.4
    ]

    content = awkward1.layout.NumpyArray(
        numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
    index = awkward1.layout.Index64(
        numpy.array([3, 2, 2, 5, 0], dtype=numpy.int64))
    array = awkward1.layout.IndexedArray64(index, content)
    assert isinstance(awkward1.to_awkward0(array), awkward0.IndexedArray)
    assert awkward1.to_awkward0(array).tolist() == [3.3, 2.2, 2.2, 5.5, 0.0]
def cleandataset(f, defaults_per_variable, isMC):
    print('Doing cleaning, isMC = ', isMC)
    feature_names = [
        k for k in f['Events'].keys()
        if (('Jet_eta' == k) or ('Jet_pt' == k) or ('Jet_DeepCSV' in k))
    ]
    # tagger output to compare with later and variables used to get the truth output
    feature_names.extend(('Jet_btagDeepB_b', 'Jet_btagDeepB_bb',
                          'Jet_btagDeepC', 'Jet_btagDeepL'))
    if isMC == True:
        feature_names.extend(('Jet_nBHadrons', 'Jet_hadronFlavour'))
    #print(feature_names)
    #print(len(feature_names))

    # go through a specified number of events, and get the information (awkward-arrays) for the keys specified above
    for data in f['Events'].iterate(feature_names,
                                    step_size=f['Events'].num_entries,
                                    library='ak'):
        break

    # creating an array to store all the columns with their entries per jet, flatten per-event -> per-jet
    datacolumns = np.zeros(
        (len(feature_names) + 1, len(ak.flatten(data['Jet_pt'], axis=1))))
    #print(len(datacolumns))

    for featureindex in range(len(feature_names)):
        a = ak.flatten(data[feature_names[featureindex]],
                       axis=1)  # flatten along first inside to get jets

        datacolumns[featureindex] = ak.to_numpy(a)

    if isMC == True:
        nbhad = ak.to_numpy(ak.flatten(data['Jet_nBHadrons'], axis=1))
        hadflav = ak.to_numpy(ak.flatten(data['Jet_hadronFlavour'], axis=1))

        target_class = np.full_like(hadflav, 3)  # udsg
        target_class = np.where(hadflav == 4, 2, target_class)  # c
        target_class = np.where(np.bitwise_and(hadflav == 5, nbhad > 1), 1,
                                target_class)  # bb
        target_class = np.where(np.bitwise_and(hadflav == 5, nbhad <= 1), 0,
                                target_class)  # b, lepb

        #print(np.unique(target_class))

        #datacolumns[len(feature_names)] = ak.to_numpy(target_class)
        datacolumns[len(feature_names)] = target_class
        #print(np.unique(datacolumns[len(feature_names)]))

    datavectors = datacolumns.transpose()
    #print(np.unique(datavectors[:,len(feature_names)]))

    #print(i)
    for j in range(67):
        datavectors[:, j][datavectors[:,
                                      j] == np.nan] = defaults_per_variable[j]
        datavectors[:,
                    j][datavectors[:, j] <= -np.inf] = defaults_per_variable[j]
        datavectors[:,
                    j][datavectors[:, j] >= np.inf] = defaults_per_variable[j]
        datavectors[:, j][datavectors[:, j] == -999] = defaults_per_variable[j]
        # this one line is new and the reason for that is that there can be "original" -999 defaults in the inputs that should now also move into the new
        # default bin, it was not necessary in my old clean_1_2.py code, because I could just leave them where they are, here they need to to be modified
        #print(np.unique(datavectors[:,-1]))
    #print(np.unique(datavectors[:,-1]))
    datavecak = ak.from_numpy(datavectors)
    #print(ak.unique(datavecak[:,-1]))
    #print(len(datavecak),"entries before cleaning step 1")

    #datavecak = datavecak[datavecak[:, 67] >= 0.]
    #datavecak = datavecak[datavecak[:, 67] <= 1.]
    #datavecak = datavecak[datavecak[:, 68] >= 0.]
    #datavecak = datavecak[datavecak[:, 68] <= 1.]
    #datavecak = datavecak[datavecak[:, 69] >= 0.]
    #datavecak = datavecak[datavecak[:, 69] <= 1.]
    #datavecak = datavecak[datavecak[:, 70] >= 0.]
    #datavecak = datavecak[datavecak[:, 70] <= 1.]

    # check jetNSelectedTracks, jetNSecondaryVertices > 0
    #datavecak = datavecak[(datavecak[:, 63] > 0) | (datavecak[:, 64] > 0)]  # keep those where at least any of the two variables is > 0, they don't need to be > 0 simultaneously
    #print(len(datavecak),"entries after cleaning step 1")

    alldata = ak.to_numpy(datavecak)
    #print(np.unique(alldata[:,-1]))

    for track0_vars in [6, 12, 22, 29, 35, 42, 50]:
        alldata[:, track0_vars][
            alldata[:, 64] <= 0] = defaults_per_variable[track0_vars]
    for track0_1_vars in [7, 13, 23, 30, 36, 43, 51]:
        alldata[:, track0_1_vars][
            alldata[:, 64] <= 1] = defaults_per_variable[track0_1_vars]
    for track01_2_vars in [8, 14, 24, 31, 37, 44, 52]:
        alldata[:, track01_2_vars][
            alldata[:, 64] <= 2] = defaults_per_variable[track01_2_vars]
    for track012_3_vars in [9, 15, 25, 32, 38, 45, 53]:
        alldata[:, track012_3_vars][
            alldata[:, 64] <= 3] = defaults_per_variable[track012_3_vars]
    for track0123_4_vars in [10, 16, 26, 33, 39, 46, 54]:
        alldata[:, track0123_4_vars][
            alldata[:, 64] <= 4] = defaults_per_variable[track0123_4_vars]
    for track01234_5_vars in [11, 17, 27, 34, 40, 47, 55]:
        alldata[:, track01234_5_vars][
            alldata[:, 64] <= 5] = defaults_per_variable[track01234_5_vars]
    alldata[:, 18][alldata[:, 65] <= 0] = defaults_per_variable[18]
    alldata[:, 19][alldata[:, 65] <= 1] = defaults_per_variable[19]
    alldata[:, 20][alldata[:, 65] <= 2] = defaults_per_variable[20]
    alldata[:, 21][alldata[:, 65] <= 3] = defaults_per_variable[21]

    for AboveCharm_vars in [41, 48, 49, 56]:
        alldata[:,
                AboveCharm_vars][alldata[:, AboveCharm_vars] ==
                                 -1] = defaults_per_variable[AboveCharm_vars]

    datacls = [i for i in range(0, 67)]
    if isMC == True:
        datacls.append(73)
    dataset = alldata[:, datacls]
    #print(np.unique(dataset[:,-1]))

    #DeepCSV_dataset = alldata[:, 67:71]

    return dataset