def test_Agglomerative_setup_ints_floats(): """ Create the _ints and _floats, along with the _avaliable_mask and _avaliable_idxs Parameters ---------- input_data : EventWise or (2d array of ints, 2d array of floats) data file for inputs """ floats = SimpleClusterSamples.two_close["floats"] with TempTestDir("tst") as dir_name: ew = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) ew.selected_event = 0 set_JetInputs(ew, floats) agg = FormJets.GeneralisedKT(ew) # should make 3 rows so there is a row for the tst.assert_allclose(agg._avaliable_mask, [True, True, False]) tst.assert_allclose(agg.PT[:2], np.ones(2)) assert {0, 1} == set(agg._avaliable_idxs) assert set(agg.Label) == {-1, 0, 1} # should be able to achive the same by passing ints and floats ints = -np.ones_like(agg._ints) ints[[0, 1], 0] = [0, 1] agg2 = FormJets.GeneralisedKT((agg._ints[:2], floats)) tst.assert_allclose(agg2._ints, agg._ints) tst.assert_allclose(agg2._floats, agg._floats)
def test_Agglomerative_chose_pair(): """ Find the next two particles to join. Return ---------- row : int index of first of the pair of particles to next join. column : int index of second of the pair of particles to next join. """ floats = SimpleClusterSamples.two_close["floats"] floats = np.concatenate((floats, floats[[0]])) ints = -np.ones((3, len(FormJets.Agglomerative.int_columns))) ints[:, 0] = [0, 1, 2] agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10) agg.setup_internal() idx1, idx2 = agg.chose_pair() assert {0, 2} == {idx1, idx2} floats = SimpleClusterSamples.two_oposite["floats"] params = {"DeltaR": 100} ints = -np.ones((2, len(FormJets.Agglomerative.int_columns))) ints[:, 0] = [0, 1] agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10, dict_jet_params=params) agg.setup_internal() idx1, idx2 = agg.chose_pair() assert {0, 1} == {idx1, idx2}
def test_Agglomerative_get_historic_2d_mask(): """get a _2d_avaliable_indices mask for a previous step only works if debug_data is stored Parameters ---------- step : int index of step starting from oldest retained step which may not be first step Returns ------- : tuple of arrays tuple that will index the matrix minor """ floats = SimpleClusterSamples.two_close["floats"] floats = np.concatenate((floats, floats)) ints = -np.ones((4, len(FormJets.Agglomerative.int_columns))) ints[:, 0] = [0, 1, 2, 3] agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10) agg.debug_run() first_mask = agg.get_historic_2d_mask(0) test = np.arange(100).reshape((10, 10)) selected = test[first_mask] expected = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33]]) tst.assert_allclose(expected, selected)
def compare_FastJet_FormJets(floats, deltaR, expofPTInput): """Helper function, that checks that clustering produced by fastjet match the GeneralisedKT answers""" # set distance to 0 floats[:, -1] = 0. for i in range(len(floats)): fill_angular(floats[i], energy_boost_factor=10) # need to keep the eventwise file around with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) set_JetInputs(eventWise, floats) eventWise.selected_event = 0 dict_jet_params = { "DeltaR": deltaR, "ExpofPTInput": expofPTInput, "ExpofPTFormatInput": "genkt" } genkt = FormJets.GeneralisedKT(eventWise, dict_jet_params=dict_jet_params, run=True) genkt_labels = [set(jet.Leaf_Label) for jet in genkt.split()] fastjet = FastJetPython.run_applyfastjet(eventWise, deltaR, expofPTInput, "Jet") for jet in fastjet.split(): labels = set(jet.Leaf_Label) assert labels in genkt_labels, f"{labels}, not in {genkt_labels}"
def test_Agglomerative_get_decendants(): """ Get all decendants of a chosen particle within the structure of the jet. Parameters ---------- last_only : bool Only return the end point decendants (Default value = True) start_label : int start_label used to identify the starting particle if not given idx required (Default value = None) start_idx : int Internal index to identify the particle if not given start_label required (Default value = None) Returns ------- decendants : list of ints local indices of the decendants """ ints = np.array([[0, -1, -1, -1, 0]]) floats = np.zeros((1, len(FormJets.Agglomerative.float_columns))) agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10) out = agg.get_decendants(True, start_label=0) tst.assert_allclose(out, [0]) ints = np.array([[0, 1, -1, -1, 0], [2, 1, -1, -1, 0], [1, -1, 2, 0, 1]]) floats = np.zeros((3, len(FormJets.Agglomerative.float_columns))) agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10) out = agg.get_decendants(True, start_label=0) assert set(out) == {0} out = agg.get_decendants(True, start_label=1) assert set(out) == {0, 1} out = agg.get_decendants(False, start_label=1) assert set(out) == {0, 1, 2} out = agg.get_decendants(True, start_idx=2) assert set(out) == {0, 1} out = agg.get_decendants(False, start_idx=2) assert set(out) == {0, 1, 2}
def test_GeneralisedKT_simple_cases(): """Perform the clustering, without storing debug_data.""" floats = SimpleClusterSamples.two_degenerate['floats'] ints = SimpleClusterSamples.two_degenerate['ints'] # for any deltaR, no matter how small, degenerate particles should join params = dict(DeltaR=0.001) jet1 = FormJets.GeneralisedKT((ints, floats), dict_jet_params=params, run=True) expected_floats = SimpleClusterSamples.degenerate_join['floats'] expected_ints = SimpleClusterSamples.degenerate_join['ints'] mask = jet1.Label != -1 match_ints_floats(expected_ints, expected_floats, jet1._ints[mask], jet1._floats[mask]) # now try close floats = SimpleClusterSamples.two_close['floats'] ints = SimpleClusterSamples.two_close['ints'] # for a very small deltaR these don't join params = dict(DeltaR=0.001) jet1 = FormJets.GeneralisedKT((ints, floats), dict_jet_params=params, run=True) mask = jet1.Label != -1 match_ints_floats(ints, floats, jet1._ints[mask], jet1._floats[mask]) # with a more normal deltaR they do expected_floats = SimpleClusterSamples.close_join['floats'] expected_ints = SimpleClusterSamples.close_join['ints'] params = dict(DeltaR=0.5) jet1 = FormJets.GeneralisedKT((ints, floats), dict_jet_params=params, run=True) mask = jet1.Label != -1 match_ints_floats(expected_ints, expected_floats, jet1._ints[mask], jet1._floats[mask]) # far appart objects don't tend to join floats = SimpleClusterSamples.two_oposite['floats'] ints = SimpleClusterSamples.two_oposite['ints'] params = dict(DeltaR=0.5) jet1 = FormJets.GeneralisedKT((ints, floats), dict_jet_params=params, run=True) mask = jet1.Label != -1 match_ints_floats(ints, floats, jet1._ints[mask], jet1._floats[mask])
def check_joins(ew, deltaR, exp): genkt = FormJets.GeneralisedKT(ew, dict_jet_params={ "DeltaR": deltaR, "ExpofPTInput": exp }, run=True, memory_cap=len(ew.JetInputs_Energy) * 2) genkt_joins = np.sum((genkt.Child1 != -1) * (genkt.Label != -1)) fastjet = FastJetPython.run_applyfastjet(ew, deltaR, exp, "Jet") fastjet_joints = np.sum((fastjet.Label != -1) * (fastjet.Child1 != -1)) return fastjet_joints, genkt_joins
def test_create_jet_contents(): """Process the unsplit jets in jet_list to from a content dict for an eventWise. Parameters ---------- jet_list : list of Agglomerative the unsplit agglomerative jets, one for each event. existing_contents : dict The contents dict for all previous events Return ------ contents : dict A dict with the contents of previous and new events. """ ints = np.array([[0, 1, -1, -1, 0], [2, 1, -1, -1, 0], [1, -1, 2, 0, 1]]) floats = np.zeros((3, len(FormJets.Agglomerative.float_columns))) floats[:, 0] = np.arange(3) agg1 = FormJets.GeneralisedKT((ints, floats), jet_name="DogJet", memory_cap=10) ints = np.array([[0, 1, -1, -1, 0], [2, 1, -1, -1, 0], [1, -1, 2, 0, 1], [3, -1, -1, -1, 0]]) floats = np.ones((4, len(FormJets.Agglomerative.float_columns))) agg2 = FormJets.GeneralisedKT((ints, floats), jet_name="DogJet", memory_cap=10) ints = np.empty((0, len(FormJets.Agglomerative.int_columns))) floats = np.empty((0, len(FormJets.Agglomerative.float_columns))) agg3 = FormJets.GeneralisedKT((ints, floats), jet_name="DogJet", memory_cap=10) jet_list = [agg1, agg2, agg3] contents = FormJets.create_jet_contents(jet_list, {}) assert len(contents["DogJet_Label"]) == 3 assert len(contents["DogJet_Label"][0]) == 1 assert len(contents["DogJet_Label"][1]) == 2 assert len(contents["DogJet_Label"][2]) == 0 tst.assert_allclose(sorted(contents["DogJet_PT"][0][0]), np.arange(3))
def test_Agglomerative_split(): """ Split this jet into as many unconnected jets as it contains Returns ------- jet_list : list of Clustering the indervidual jets found in here """ ints = np.array([[0, 1, -1, -1, 0], [2, 1, -1, -1, 0], [1, -1, 2, 0, 1]]) floats = np.zeros((3, len(FormJets.Agglomerative.float_columns))) agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10) jet_list = agg.split() assert len(jet_list) == 1 assert len(jet_list[0].Label) == 3 ints = np.array([[0, 1, -1, -1, 0], [2, 1, -1, -1, 0], [1, -1, 2, 0, 1], [3, -1, -1, -1, 0]]) floats = np.zeros((4, len(FormJets.Agglomerative.float_columns))) agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10) jet_list = agg.split() assert len(jet_list) == 2 assert {len(j.Label) for j in jet_list} == {1, 3}
def test_Agglomerative_combine_ints_floats(): """ Caluclate the floats and ints created by combining two pseudojets. Parameters ---------- idx1 : int index of the first pseudojet to input idx2 : int index of the second pseudojet to input distance2 : float distanc esquared between the pseudojets Returns ------- ints : list of ints int columns of the combined pseudojet, order as per the column attributes floats : list of floats float columns of the combined pseudojet, order as per the column attributes """ degenerate_ints = SimpleClusterSamples.degenerate_join["ints"] degenerate_floats = SimpleClusterSamples.degenerate_join["floats"] agg = FormJets.GeneralisedKT((degenerate_ints[:2], degenerate_floats[:2]), memory_cap=10) new_ints, new_floats = agg.combine_ints_floats(0, 1, 0.) tst.assert_allclose(new_ints, degenerate_ints[-1]) tst.assert_allclose(new_floats, degenerate_floats[-1]) close_ints = SimpleClusterSamples.close_join["ints"] close_floats = SimpleClusterSamples.close_join["floats"] agg = FormJets.GeneralisedKT((close_ints[:2], close_floats[:2]), memory_cap=10) new_ints, new_floats = agg.combine_ints_floats(0, 1, 0.1**2) tst.assert_allclose(new_ints, close_ints[-1]) tst.assert_allclose(new_floats, close_floats[-1])
def test_GeneralisedKT_run(): """Perform the clustering, without storing debug_data.""" # shouldn't choke on an empty event floats = np.empty((0, 8)) # need to keep the eventwise file around with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) set_JetInputs(eventWise, floats) eventWise.selected_event = 0 jet = FormJets.GeneralisedKT(eventWise) jet.debug_run() n_points = 10 ints = -np.ones( (n_points, len(FormJets.GeneralisedKT.int_columns)), dtype=int) ints[:, 0] = np.arange(n_points) np.random.seed(1) floats = np.random.rand(n_points, len( FormJets.GeneralisedKT.float_columns)) * 10 for i in range(n_points): fill_angular(floats[i]) # the aim is to prove that the the first step is not conceptually # diferent from latter steps params = {"DeltaR": np.inf} jet1 = FormJets.GeneralisedKT((ints, floats), dict_jet_params=params) jet1.setup_internal() idx1, idx2 = jet1.chose_pair() jet1.step(idx1, idx2) # make a jet out of the ints and floats ints2 = jet1._ints[jet1.Label != -1] floats2 = jet1._floats[jet1.Label != -1] jet2 = FormJets.GeneralisedKT((ints2, floats2), dict_jet_params=params) jet1.run() jet2.run() num_parts = np.sum(jet1.Label != -1) tst.assert_allclose(jet1._ints[:num_parts], jet2._ints[:num_parts]) tst.assert_allclose(jet1._floats[:num_parts], jet2._floats[:num_parts])
def test_Agglomerative_next_free_row(): """Find the next free index to place a new point. Returns ------- i : int index of free point """ floats = SimpleClusterSamples.two_close["floats"] floats = np.concatenate((floats, floats)) ints = -np.ones((4, len(FormJets.Agglomerative.int_columns))) ints[:, 0] = [0, 1, 2, 3] agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10) assert agg._next_free_row() == 4 agg._ints[0, 0] = -1 assert agg._next_free_row() == 0 agg._ints[:, 0] = np.arange(10) assert agg._next_free_row() == 10
def test_Agglomerative_2d_avaliable_indices(): """ Using the _avaliable_idxs make indices for indexing the corrisponding minor or a 2d matrix. Returns ------- : tuple of arrays tuple that will index the matrix minor """ ints = -np.ones((4, len(FormJets.Agglomerative.int_columns))) ints[:, 0] = [0, 1, 2, 3] floats = np.zeros((4, len(FormJets.Agglomerative.float_columns))) agg = FormJets.GeneralisedKT((ints, floats)) agg._update_avalible([1, 3]) mask = agg._2d_avaliable_indices test = np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]) tst.assert_allclose(test[mask], np.array([[0, 2], [8, 10]]))
def test_Agglomerative_reoptimise_preallocated(): """Rearange the objects in memory to accomidate more. Memory limit has been reached, the preallocated arrays need to be rearanged to allow for removing objects which are no longer needed. anything still in _avaliable_idxs will not be moved. Also, remove anything in debug_data, becuase it will be invalidated. """ floats = SimpleClusterSamples.two_close["floats"] floats = np.concatenate((floats, floats)) floats[:, FormJets.Clustering.float_columns.index("Energy")] = \ np.arange(4) + 1 ints = -np.ones((4, len(FormJets.Agglomerative.int_columns))) ints[:, 0] = [0, 1, 2, 3] agg = FormJets.GeneralisedKT((ints, floats), memory_cap=5) agg.run() assert len(agg.Label) == 7 assert np.sum(agg.Label > -1) == 7 assert len(agg.Available_Label) == 1 label = list(agg.Label) energies = [agg.Energy[label.index(i)] for i in range(4)] tst.assert_allclose(energies, [1., 2., 3., 4.])
def test_Agglomerative_update_avalible(): """Update which indices are avalible Parameters ---------- idxs_out : iterable of ints the indices of points that are no longer avaliable. idxs_in : iterable of ints (optional) the indices of points that are now avaliable. """ floats = SimpleClusterSamples.two_close["floats"] floats = np.concatenate((floats, floats)) ints = -np.ones((4, len(FormJets.Agglomerative.int_columns))) ints[:, 0] = [0, 1, 2, 3] agg = FormJets.GeneralisedKT((ints, floats), memory_cap=10) agg._update_avalible([0]) assert set(agg._avaliable_idxs) == {1, 2, 3} expected = np.zeros(10, dtype=bool) expected[[1, 2, 3]] = True tst.assert_allclose(expected, agg._avaliable_mask) agg._update_avalible([], [5, 6]) assert set(agg._avaliable_idxs) == {1, 2, 3, 5, 6} expected[[5, 6]] = True tst.assert_allclose(expected, agg._avaliable_mask)
input("press enter to continue\n...") print("~~ Example of clustering a single event ~~") print( "Anti-KT, Cambridge-Aachen and KT jet clustering can all be created with" ) print("FormJet.GeneralisedKT which is a generalise method.") print("To cluster a single event with Anti-KT first select the event ") print(" > eventWise.selected_event = 0") eventWise.selected_event = 0 print("Then create an object of type FormJet.GeneralisedKT") print( "traditonal_jets = FormJets.GeneralisedKT(eventWise, DeltaR=0.8, ExpofPTInput=-1., assign=True)" ) traditonal_jets = FormJets.GeneralisedKT(eventWise, DeltaR=0.8, ExpofPTInput=-1., assign=True) input("press enter to continue\n...") print("This object has clustered the event with anti-KT,") print( "the key to this being anti-KT is the parameter ExpofPTMultiplier being -1." ) print("ExpofPTInput equal to 0 is Cambridge-Aachen and 1 is KT.") print("This object has some useful attributes;") print("This form of jet clustering puts the particles into binary trees") print("The form of these trees is given by the attributes") print("Parent, Child1, Child2") print(" > traditonal_jets.Parent") print(traditonal_jets.Parent) print("The numbers are local indices, so") print(
def read_fastjet(arg, jet_name="FastJet", do_checks=False): """ Read the outputs of the fastjet program into a PseudoJet Parameters ---------- arg : list of strings A list of strings it is the byte output of the fastjet program jet_name : string Name of the jet to be prefixed in the eventWise (Default value = "FastJet") do_checks : bool If checks ont he form of the fastjet output should be done (slow) (Default value = False) Returns ------- new_pseudojet : PseudoJet the peseudojets read from the program """ # fastjet format header = arg[0].decode()[1:] arrays = [[]] a_type = int for line in arg[1:]: line = line.decode().strip() if line[0] == '#': # moves from the ints to the doubles arrays.append([]) a_type = float fcolumns = line[1:].split() else: arrays[-1].append([a_type(x) for x in line.split()]) assert len(arrays) == 2, f"Problem with input; \n{arg}" fast_ints = np.array(arrays[0], dtype=int) fast_floats = np.array(arrays[1], dtype=float) # first line will be the tech specs and columns header = header.split() DeltaR = float(header[0].split('=')[1]) algorithm_name = header[1] if algorithm_name == 'kt_algorithm': ExpofPTInput = 1 elif algorithm_name == 'cambridge_algorithm': ExpofPTInput = 0 elif algorithm_name == 'antikt_algorithm': ExpofPTInput = -1 else: raise ValueError(f"Algorithm {algorithm_name} not recognised") # get the colums for the header icolumns = { name: i for i, name in enumerate(header[header.index("Columns;") + 1:]) } # and from this get the columns # the file of fast_ints contains n_fastjet_int_cols = len(icolumns) if len(fast_ints.shape) == 1: fast_ints = fast_ints.reshape((-1, n_fastjet_int_cols)) else: assert fast_ints.shape[1] == n_fastjet_int_cols next_free = np.max(fast_ints[:, icolumns["Label"]], initial=-1) + 1 fast_idx_dict = {} for line_idx, label in fast_ints[:, [ icolumns["pseudojet_id"], icolumns["Label"] ]]: if label == -1: fast_idx_dict[line_idx] = next_free next_free += 1 else: fast_idx_dict[line_idx] = label fast_idx_dict[-1] = -1 fast_ints = np.vectorize(fast_idx_dict.__getitem__, otypes=[np.float])( fast_ints[:, [ icolumns["pseudojet_id"], icolumns["parent_id"], icolumns["child1_id"], icolumns["child2_id"] ]]) # now the Label is the first one and the pseudojet_id can be removed del icolumns["pseudojet_id"] icolumns = {name: i - 1 for name, i in icolumns.items()} n_fastjet_float_cols = len(fcolumns) if do_checks: # check that the parent child relationship is reflexive for line in fast_ints: identifier = f"pseudojet inputIdx={line[0]} " if line[icolumns["child1_id"]] == -1: assert line[icolumns["child2_id"]] == -1, \ identifier + "has only one child" else: assert line[icolumns["child1_id"]] != \ line[icolumns["child2_id"]], \ identifier + " child1 and child2 are same" child1_line = fast_ints[fast_ints[:, icolumns["Label"]] == line[icolumns["child1_id"]]][0] assert child1_line[1] == line[0], \ identifier + \ " first child dosn't acknowledge parent" child2_line = fast_ints[fast_ints[:, icolumns["Label"]] == line[icolumns["child2_id"]]][0] assert child2_line[1] == line[0], \ identifier + " second child dosn't acknowledge parent" if line[1] != -1: assert line[icolumns["Label"]] != \ line[icolumns["parent_id"]], \ identifier + "is it's own mother" parent_line = fast_ints[fast_ints[:, icolumns["Label"]] == line[icolumns["parent_id"]]][0] assert line[0] in parent_line[[icolumns["child1_id"], icolumns["child2_id"]]], \ identifier + " parent doesn't acknowledge child" for fcol, expected in zip(fcolumns, FormJets.Clustering.float_columns): assert expected.endswith(fcol) if len(fast_ints) == 0: assert len(fast_floats) == 0, \ "No ints found, but floats are present!" print("Warning, no values from fastjet.") if len(fast_floats.shape) == 1: fast_floats = fast_floats.reshape((-1, n_fastjet_float_cols)) else: assert fast_floats.shape[1] == n_fastjet_float_cols if len(fast_ints.shape) > 1: num_rows = fast_ints.shape[0] assert len(fast_ints) == len(fast_floats) elif len(fast_ints) > 0: num_rows = 1 else: num_rows = 0 ints = np.full((num_rows, len(FormJets.Clustering.int_columns)), -1, dtype=int) floats = np.zeros((num_rows, len(FormJets.Clustering.float_columns)), dtype=float) if len(fast_ints) > 0: ints[:, :4] = fast_ints floats[:, :7] = fast_floats # make ranks rank = -1 rank_col = len(icolumns) ints[ints[:, icolumns["child1_id"]] == -1, rank_col] = rank # parents of the lowest rank is the next rank this_rank = set(ints[ints[:, icolumns["child1_id"]] == -1, icolumns["parent_id"]]) this_rank.discard(-1) while len(this_rank) > 0: rank += 1 next_rank = [] for i in this_rank: ints[ints[:, icolumns["Label"]] == i, rank_col] = rank parent = ints[ints[:, icolumns["Label"]] == i, icolumns["parent_id"]] if parent != -1 and parent not in next_rank: next_rank.append(parent) this_rank = next_rank # create the pseudojet dict_jet_params = dict(DeltaR=DeltaR, ExpofPTInput=ExpofPTInput, ExpofPTFormatInput='genkt') new_pseudojet = FormJets.GeneralisedKT((ints, floats), jet_name=jet_name, dict_jet_params=dict_jet_params, memory_cap=len(ints) + 1) return new_pseudojet