def test_ArrayBuilder_append_2(): # issue #415 A = awkward1.from_numpy(numpy.array([0, 1, 2], dtype=numpy.float32)) B = awkward1.from_numpy(numpy.array([0, 1], dtype=numpy.float32)) builder = awkward1.ArrayBuilder() with builder.list(): builder.append(A.tolist()) with builder.list(): builder.append(A.tolist()) with builder.list(): pass with builder.list(): builder.append(B.tolist()) assert builder.snapshot().tolist() == [[[0, 1, 2]], [[0, 1, 2]], [], [[0, 1]]] assert str(awkward1.type(builder.snapshot())) == "4 * var * var * float64" builder = awkward1.ArrayBuilder() with builder.list(): builder.append(A) with builder.list(): builder.append(A) with builder.list(): pass with builder.list(): builder.append(B) assert builder.snapshot().tolist() == [[[0, 1, 2]], [[0, 1, 2]], [], [[0, 1]]] assert str(awkward1.type(builder.snapshot())) == "4 * var * var * float32" @numba.njit def f1(builder, A, B): builder.begin_list() builder.append(A) builder.end_list() builder.begin_list() builder.append(A) builder.end_list() builder.begin_list() builder.end_list() builder.begin_list() builder.append(B) builder.end_list() return builder assert f1(awkward1.ArrayBuilder(), A, B).snapshot().tolist() == [[[0, 1, 2]], [[0, 1, 2]], [], [[0, 1]]]
def test_lists(): one = awkward1.Array([[1, 2, 3], [], [4, 5]]).layout two = awkward1.Array([[1.1, 2.2], [3.3, 4.4]]).layout three = awkward1.layout.EmptyArray() four = awkward1.from_numpy(numpy.array([[10], [20]]), regulararray=True, highlevel=False) assert awkward1.to_list(one.mergemany([two, three, four])) == [[1.0, 2.0, 3.0], [], [4.0, 5.0], [1.1, 2.2], [3.3, 4.4], [10.0], [20.0]] assert awkward1.to_list(four.mergemany([three, two, one])) == [[10.0], [20.0], [1.1, 2.2], [3.3, 4.4], [1.0, 2.0, 3.0], [], [4.0, 5.0]] one = awkward1.layout.ListArray64(one.starts, one.stops, one.content) two = awkward1.layout.ListArray64(two.starts, two.stops, two.content) assert awkward1.to_list(one.mergemany([two, three, four])) == [[1.0, 2.0, 3.0], [], [4.0, 5.0], [1.1, 2.2], [3.3, 4.4], [10.0], [20.0]] assert awkward1.to_list(four.mergemany([three, two, one])) == [[10.0], [20.0], [1.1, 2.2], [3.3, 4.4], [1.0, 2.0, 3.0], [], [4.0, 5.0]]
def test_localindex(): array = awkward1.from_iter( [[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]], highlevel=False) assert awkward1.to_list(array.localindex(0)) == [0, 1, 2, 3, 4] assert awkward1.to_list(array.localindex(1)) == [[0, 1, 2], [], [0, 1], [0], [0, 1, 2, 3]] array = awkward1.from_iter([[[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], [[5.5]], [[6.6, 7.7, 8.8, 9.9]]], highlevel=False) assert awkward1.to_list(array.localindex(0)) == [0, 1, 2, 3] assert awkward1.to_list(array.localindex(1)) == [[0, 1, 2], [], [0], [0]] assert awkward1.to_list(array.localindex(2)) == [[[0, 1, 2], [], [0, 1]], [], [[0]], [[0, 1, 2, 3]]] array = awkward1.from_numpy(numpy.arange(2 * 3 * 5).reshape(2, 3, 5), regulararray=True, highlevel=False) assert awkward1.to_list(array.localindex(0)) == [0, 1] assert awkward1.to_list(array.localindex(1)) == [[0, 1, 2], [0, 1, 2]] assert awkward1.to_list(array.localindex(2)) == [[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]]
def test_regulararray(): layout = awkward1.from_numpy(numpy.array([[1, 2, 3], [4, 5, 6]]), regulararray=True, highlevel=False) numbatype = awkward1._connect._numba.arrayview.tonumbatype(layout.form) assert awkward1_connect_numba_layout.typeof(layout).name == numbatype.name lookup1 = awkward1_connect_numba_arrayview.Lookup(layout) lookup2 = awkward1_connect_numba_arrayview.Lookup(layout.form) numbatype.form_fill(0, layout, lookup2) assert numpy.array_equal(lookup1.arrayptrs, lookup2.arrayptrs) assert numpy.array_equal(lookup1.sharedptrs == -1, lookup2.sharedptrs == -1) counter = [0] def materialize(): counter[0] += 1 return layout generator = awkward1.layout.ArrayGenerator(materialize, form=layout.form, length=len(layout)) virtualarray = awkward1.layout.VirtualArray(generator) lookup3 = awkward1_connect_numba_arrayview.Lookup(virtualarray) assert len(lookup1.arrayptrs) + 3 == len(lookup3.arrayptrs) array = awkward1.Array(virtualarray) array.numba_type assert counter[0] == 0 @numba.njit def f3(x): return x assert isinstance(f3(array).layout, awkward1.layout.VirtualArray) assert counter[0] == 0 @numba.njit def f1(x): return x[1][1] assert f1(array) == 5 assert counter[0] == 1 assert f1(array) == 5 assert counter[0] == 1 @numba.njit def f2(x): return x[1] assert awkward1.to_list(f2(array)) == [4, 5, 6] assert counter[0] == 1 assert awkward1.to_list(f2(array)) == [4, 5, 6] assert counter[0] == 1 assert awkward1.to_list(f3(array)) == [[1, 2, 3], [4, 5, 6]]
def test_compare_optim_methods(self): subfolder = "ExampleTinyModifiedObs" # big data from classical v2 folder = join("Datasets", subfolder) obs_mat, attrs = load_standard_path_format_csv(folder, delim=" ", angles_included=True) import awkward1 as ak obs_mat = obs_mat.toarray() obs_record = ak.from_numpy(obs_mat) incidence_mat, travel_times_mat, angle_cts_mat = attrs left, _, _, u_turn = AngleProcessor.get_turn_categorical_matrices( angle_cts_mat, incidence_mat) data_list = [travel_times_mat, left, u_turn] network_data_struct = ModelDataStruct(data_list, incidence_mat) # network_data_struct.add_second_travel_time_for_testing() optimiser = optimisers.LineSearchOptimiser( optimisers.OptimHessianType.BFGS, max_iter=4) RecursiveLogitModelEstimation.zeros_error_override = False model = RecursiveLogitModelEstimation(network_data_struct, optimiser, observations_record=obs_record, initial_beta=-15) m1_ll_out, m1_grad_out = model.get_log_likelihood() optimiser2 = optimisers.ScipyOptimiser(method='newton-cg') model2 = RecursiveLogitModelEstimation(network_data_struct, optimiser2, observations_record=obs_record, initial_beta=-15) m2_ll_out, m2_grad_out = model2.get_log_likelihood() assert np.allclose(m2_ll_out, m1_ll_out) assert np.allclose(m2_grad_out, m1_grad_out) beta1 = model.solve_for_optimal_beta() beta2 = model2.solve_for_optimal_beta(verbose=True) m1_ll_out, m1_grad_out = model.get_log_likelihood() m2_ll_out, m2_grad_out = model2.get_log_likelihood() print(m1_ll_out, m2_ll_out) print(m1_grad_out, m2_grad_out) assert np.allclose(beta1, beta2, 0.34657) RecursiveLogitModelEstimation.zeros_error_override = None
def test_example_tiny_modified_awkward_array(self): subfolder = "ExampleTinyModifiedObs" # big data from classical v2 folder = join("Datasets", subfolder) obs_mat, attrs = load_standard_path_format_csv(folder, delim=" ", angles_included=True) import awkward1 as ak obs_mat = obs_mat.toarray() obs_record = ak.from_numpy(obs_mat) incidence_mat, travel_times_mat, angle_cts_mat = attrs left, _, _, u_turn = AngleProcessor.get_turn_categorical_matrices( angle_cts_mat, incidence_mat) # incidence matrix which only has nonzero travel times # - rather than what is specified in file t_time_incidence = (travel_times_mat > 0).astype('int').todok() self._tiny_modified_common_data_checks(travel_times_mat, left, u_turn, t_time_incidence, incidence_mat, obs_record)
def pandas_series_to_awkward(series, version=1): values = series.values if "fletcher" not in str(values.dtype).lower(): if version == 1: return awkward1.from_numpy(values) else: return np.array(values, copy=False) array_arrow = values.data if version == 0: array = awkward0.fromarrow(array_arrow) if "MaskedArray" in str(type(array)): array = array._content[array.boolmask()] elif version == 1: array = awkward1.from_arrow(array_arrow) else: raise RuntimeError( "What version of awkward do you want? Specify `version=0` or `1`." ) return array
def test_fromnumpy(): a = numpy.arange(2 * 3 * 5).reshape((2, 3, 5)) b = awkward1.from_numpy(a) assert awkward1.to_list(a) == awkward1.to_list(b)
def test_fromnumpy(): assert awkward1.to_list(awkward1.from_numpy(numpy.array(["uno", "dos", "tres", "quatro"]))) == ["uno", "dos", "tres", "quatro"] assert awkward1.to_list(awkward1.from_numpy(numpy.array([["uno", "dos"], ["tres", "quatro"]]))) == [["uno", "dos"], ["tres", "quatro"]] assert awkward1.to_list(awkward1.from_numpy(numpy.array([["uno", "dos"], ["tres", "quatro"]]), regulararray=True)) == [["uno", "dos"], ["tres", "quatro"]]
def test_toawkward0(): array = awkward1.from_iter([1.1, 2.2, 3.3, 4.4], highlevel=False) assert isinstance(awkward1.to_awkward0(array), numpy.ndarray) assert awkward1.to_awkward0(array).tolist() == [1.1, 2.2, 3.3, 4.4] array = awkward1.from_numpy(numpy.arange(2 * 3 * 5).reshape(2, 3, 5), highlevel=False).toRegularArray() assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray) assert awkward1.to_awkward0(array).tolist() == [[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]], [[15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]]] array = awkward1.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]], highlevel=False) assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray) assert awkward1.to_awkward0(array).tolist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] array = awkward1.layout.ListArray64( awkward1.layout.Index64(numpy.array([4, 999, 1], dtype=numpy.int64)), awkward1.layout.Index64(numpy.array([7, 999, 3], dtype=numpy.int64)), awkward1.layout.NumpyArray( numpy.array([3.14, 4.4, 5.5, 123, 1.1, 2.2, 3.3, 321]))) assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray) assert awkward1.to_awkward0(array).tolist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] array = awkward1.from_iter([{ "x": 0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": [2, 2] }, { "x": 3.3, "y": [3, 3, 3] }], highlevel=False) assert isinstance(awkward1.to_awkward0(array[2]), dict) assert awkward1.to_awkward0(array[2])["x"] == 2.2 assert isinstance(awkward1.to_awkward0(array[2])["y"], numpy.ndarray) assert awkward1.to_awkward0(array[2])["y"].tolist() == [2, 2] assert isinstance(awkward1.to_awkward0(array), awkward0.Table) assert awkward1.to_awkward0(array).tolist() == [{ "x": 0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": [2, 2] }, { "x": 3.3, "y": [3, 3, 3] }] array = awkward1.from_iter([(0, []), (1.1, [1]), (2.2, [2, 2]), (3.3, [3, 3, 3])], highlevel=False) assert isinstance(awkward1.to_awkward0(array), awkward0.Table) assert awkward1.to_awkward0(array).tolist() == [(0, []), (1.1, [1]), (2.2, [2, 2]), (3.3, [3, 3, 3])] assert isinstance(awkward1.to_awkward0(array[2]), tuple) assert awkward1.to_awkward0(array[2])[0] == 2.2 assert awkward1.to_awkward0(array[2])[1].tolist() == [2, 2] array = awkward1.from_iter( [0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]], highlevel=False) assert isinstance(awkward1.to_awkward0(array), awkward0.UnionArray) assert awkward1.to_awkward0(array).tolist() == [ 0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3] ] array = awkward1.from_iter([1.1, 2.2, None, None, 3.3, None, 4.4], highlevel=False) assert isinstance(awkward1.to_awkward0(array), awkward0.IndexedMaskedArray) assert awkward1.to_awkward0(array).tolist() == [ 1.1, 2.2, None, None, 3.3, None, 4.4 ] content = awkward1.layout.NumpyArray( numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) index = awkward1.layout.Index64( numpy.array([3, 2, 2, 5, 0], dtype=numpy.int64)) array = awkward1.layout.IndexedArray64(index, content) assert isinstance(awkward1.to_awkward0(array), awkward0.IndexedArray) assert awkward1.to_awkward0(array).tolist() == [3.3, 2.2, 2.2, 5.5, 0.0]
def cleandataset(f, defaults_per_variable, isMC): print('Doing cleaning, isMC = ', isMC) feature_names = [ k for k in f['Events'].keys() if (('Jet_eta' == k) or ('Jet_pt' == k) or ('Jet_DeepCSV' in k)) ] # tagger output to compare with later and variables used to get the truth output feature_names.extend(('Jet_btagDeepB_b', 'Jet_btagDeepB_bb', 'Jet_btagDeepC', 'Jet_btagDeepL')) if isMC == True: feature_names.extend(('Jet_nBHadrons', 'Jet_hadronFlavour')) #print(feature_names) #print(len(feature_names)) # go through a specified number of events, and get the information (awkward-arrays) for the keys specified above for data in f['Events'].iterate(feature_names, step_size=f['Events'].num_entries, library='ak'): break # creating an array to store all the columns with their entries per jet, flatten per-event -> per-jet datacolumns = np.zeros( (len(feature_names) + 1, len(ak.flatten(data['Jet_pt'], axis=1)))) #print(len(datacolumns)) for featureindex in range(len(feature_names)): a = ak.flatten(data[feature_names[featureindex]], axis=1) # flatten along first inside to get jets datacolumns[featureindex] = ak.to_numpy(a) if isMC == True: nbhad = ak.to_numpy(ak.flatten(data['Jet_nBHadrons'], axis=1)) hadflav = ak.to_numpy(ak.flatten(data['Jet_hadronFlavour'], axis=1)) target_class = np.full_like(hadflav, 3) # udsg target_class = np.where(hadflav == 4, 2, target_class) # c target_class = np.where(np.bitwise_and(hadflav == 5, nbhad > 1), 1, target_class) # bb target_class = np.where(np.bitwise_and(hadflav == 5, nbhad <= 1), 0, target_class) # b, lepb #print(np.unique(target_class)) #datacolumns[len(feature_names)] = ak.to_numpy(target_class) datacolumns[len(feature_names)] = target_class #print(np.unique(datacolumns[len(feature_names)])) datavectors = datacolumns.transpose() #print(np.unique(datavectors[:,len(feature_names)])) #print(i) for j in range(67): datavectors[:, j][datavectors[:, j] == np.nan] = defaults_per_variable[j] datavectors[:, j][datavectors[:, j] <= -np.inf] = defaults_per_variable[j] datavectors[:, j][datavectors[:, j] >= np.inf] = defaults_per_variable[j] datavectors[:, j][datavectors[:, j] == -999] = defaults_per_variable[j] # this one line is new and the reason for that is that there can be "original" -999 defaults in the inputs that should now also move into the new # default bin, it was not necessary in my old clean_1_2.py code, because I could just leave them where they are, here they need to to be modified #print(np.unique(datavectors[:,-1])) #print(np.unique(datavectors[:,-1])) datavecak = ak.from_numpy(datavectors) #print(ak.unique(datavecak[:,-1])) #print(len(datavecak),"entries before cleaning step 1") #datavecak = datavecak[datavecak[:, 67] >= 0.] #datavecak = datavecak[datavecak[:, 67] <= 1.] #datavecak = datavecak[datavecak[:, 68] >= 0.] #datavecak = datavecak[datavecak[:, 68] <= 1.] #datavecak = datavecak[datavecak[:, 69] >= 0.] #datavecak = datavecak[datavecak[:, 69] <= 1.] #datavecak = datavecak[datavecak[:, 70] >= 0.] #datavecak = datavecak[datavecak[:, 70] <= 1.] # check jetNSelectedTracks, jetNSecondaryVertices > 0 #datavecak = datavecak[(datavecak[:, 63] > 0) | (datavecak[:, 64] > 0)] # keep those where at least any of the two variables is > 0, they don't need to be > 0 simultaneously #print(len(datavecak),"entries after cleaning step 1") alldata = ak.to_numpy(datavecak) #print(np.unique(alldata[:,-1])) for track0_vars in [6, 12, 22, 29, 35, 42, 50]: alldata[:, track0_vars][ alldata[:, 64] <= 0] = defaults_per_variable[track0_vars] for track0_1_vars in [7, 13, 23, 30, 36, 43, 51]: alldata[:, track0_1_vars][ alldata[:, 64] <= 1] = defaults_per_variable[track0_1_vars] for track01_2_vars in [8, 14, 24, 31, 37, 44, 52]: alldata[:, track01_2_vars][ alldata[:, 64] <= 2] = defaults_per_variable[track01_2_vars] for track012_3_vars in [9, 15, 25, 32, 38, 45, 53]: alldata[:, track012_3_vars][ alldata[:, 64] <= 3] = defaults_per_variable[track012_3_vars] for track0123_4_vars in [10, 16, 26, 33, 39, 46, 54]: alldata[:, track0123_4_vars][ alldata[:, 64] <= 4] = defaults_per_variable[track0123_4_vars] for track01234_5_vars in [11, 17, 27, 34, 40, 47, 55]: alldata[:, track01234_5_vars][ alldata[:, 64] <= 5] = defaults_per_variable[track01234_5_vars] alldata[:, 18][alldata[:, 65] <= 0] = defaults_per_variable[18] alldata[:, 19][alldata[:, 65] <= 1] = defaults_per_variable[19] alldata[:, 20][alldata[:, 65] <= 2] = defaults_per_variable[20] alldata[:, 21][alldata[:, 65] <= 3] = defaults_per_variable[21] for AboveCharm_vars in [41, 48, 49, 56]: alldata[:, AboveCharm_vars][alldata[:, AboveCharm_vars] == -1] = defaults_per_variable[AboveCharm_vars] datacls = [i for i in range(0, 67)] if isMC == True: datacls.append(73) dataset = alldata[:, datacls] #print(np.unique(dataset[:,-1])) #DeepCSV_dataset = alldata[:, 67:71] return dataset