def test_simplify_shower(): # this should do nothing to an empty shower empty = FormShower.Shower([], [], [], []) empty.simplify_shower() assert len(empty) == 0 # now a simple shower where A -> B, C particle_idxs = [1, 2, 3] parents = [[], [1], [1]] children = [[2, 3], [], []] labels = ['A', 'B', 'C'] amalgam = False simple = FormShower.Shower(particle_idxs, parents, children, labels, amalgam) simple.simplify_shower() assert len(simple) == 3 # now a shower where there is a link that could be removed particle_idxs.append(4) parents.append([2]) children[1].append(4) children.append([]) labels.append('D') simple = FormShower.Shower(particle_idxs, parents, children, labels, amalgam) simple.simplify_shower() assert len(simple) == 3
def test_get_roots(): # test the empty list particle_ids = [] parents = [] found = FormShower.get_roots(particle_ids, parents) assert len(found) == 0 # test one particle particle_ids = [2] parents = [[5]] found = FormShower.get_roots(particle_ids, parents) tst.assert_allclose(found, particle_ids) # test one root, one trailing particle_ids = [2, 3] parents = [[3], [5]] found = FormShower.get_roots(particle_ids, parents) tst.assert_allclose(found, [3]) # test two roots particle_ids = [2, 3] parents = [[], [5]] found = FormShower.get_roots(particle_ids, parents) tst.assert_allclose(sorted(found), particle_ids) # test chain particle_ids = [2, 3, 5, 11] parents = [[], [2, 11], [3], [10]] found = FormShower.get_roots(particle_ids, parents) tst.assert_allclose(sorted(found), [2, 11])
def test_Shower(): # try making an empty shower empty = FormShower.Shower([], [], [], []) assert len(empty) == 0 assert empty.n_particles == 0 assert len(empty.root_idxs) == 0 empty.find_ranks() assert len(empty.ranks) == 0 empty_graph = empty.graph() assert isinstance(empty_graph, DrawTrees.DotGraph) with pytest.raises(AttributeError): empty.outside_connections with pytest.raises(AttributeError): empty.roots assert len(empty.outside_connection_idxs) == 0 assert len(empty.ends) == 0 assert len(empty.flavour) == 0 # now a simple shower where A -> B, C particle_idxs = [1, 2, 3] parents = [[], [1], [1]] children = [[2, 3], [], []] labels = ['A', 'B', 'C'] amalgam = True simple = FormShower.Shower(particle_idxs, parents, children, labels, amalgam) assert len(simple) == 3 assert simple.n_particles == 3 tst.assert_allclose(simple.root_idxs, [1]) simple.find_ranks() tst.assert_allclose(simple.ranks, [0, 1, 1]) simple_graph = simple.graph() assert isinstance(simple_graph, DrawTrees.DotGraph) assert len(simple.outside_connection_idxs) == 0 tst.assert_allclose(simple.ends, [2, 3]) particle_idxs = [1, 2, 3, 4] parents = [[], [1], [1], [5]] children = [[2, 3], [6], [], []] labels = ['A', 'B', 'C', 'D'] amalgam = True double_root = FormShower.Shower(particle_idxs, parents, children, labels, amalgam) tst.assert_allclose(sorted(double_root.root_idxs), [1, 4]) tst.assert_allclose(sorted(double_root.outside_connection_idxs), [4]) with pytest.raises(AssertionError): amalgam = False double_root = FormShower.Shower(particle_idxs, parents, children, labels, amalgam) particle_idxs = [2, 6, 7] parents = [[1], [2], [6]] children = [[6], [7], []] labels = ['B', 'E', 'F'] amalgam = False addition = FormShower.Shower(particle_idxs, parents, children, labels, amalgam) double_root.amalgamate(addition) assert len(double_root) == 6 tst.assert_allclose(sorted(double_root.root_idxs), [1, 4]) tst.assert_allclose(sorted(double_root.outside_connection_idxs), [4])
def test_add_inheritance(): params = {} jet_name = "Jet" # event 0 params['Jet_Label'] = [ak.from_iter([])] params['Jet_Parent'] = [ak.from_iter([])] params['Jet_Energy'] = [ak.from_iter([])] params['Jet_Px'] = [ak.from_iter([])] params['Jet_Py'] = [ak.from_iter([])] params['Jet_Pz'] = [ak.from_iter([])] params['Children'] = [ak.from_iter([])] params['Parents'] = [ak.from_iter([])] params['MCPID'] = [ak.from_iter([])] params['PT'] = [ak.from_iter([])] params['JetInputs_SourceIdx'] = [ak.from_iter([])] params['TagIndex'] = [ak.from_iter([])] # event 1 params['JetInputs_SourceIdx'] += [ak.from_iter(np.arange(11))] params['Jet_Label'] += [ak.from_iter([[0, 101, 2], [102, 4, 5]])] params['Jet_Parent'] += [ak.from_iter([[101, -1, 101], [-1, 102, 102]])] params['Jet_Energy'] += [ak.from_iter([[30., 10., 20.], [70., 20., 10.]])] params['Jet_Px'] += [ak.from_iter([[3., 0., 2.], [1., 2., -1.]])] params['Jet_Py'] += [ak.from_iter([[3., 0., 2.], [2., 2., 1.]])] params['Jet_Pz'] += [ak.from_iter([[3., 0., 2.], [0., 2., 2.]])] # invarient_mass 873 100 388 4859, 388, 94 # shifted energy 30 10 20 70 sqrt(393) sqrt(103) params['Children'] += [ ak.from_iter([[], [3], [], [5], [], [], [2, 7, 8, 9], [], [], [], []]) ] params['Parents'] += [ ak.from_iter([[], [], [6], [1], [], [3], [], [6], [6], [6], []]) ] params['PT'] += [ak.from_iter([3, 1, 2, 1, 2, 1, 3, 1, 2, 1, 2])] params['MCPID'] += [ak.from_iter([4, -5, 5, 3, 2, 1, -5, -1, 7, 11, 12])] params['TagIndex'] += [ak.from_iter([1, 6])] # the positivity will be 0 1 0 1 0 1 0 0 0 0 0 # 0 0 1 0 0 0 0 0 0 0 0 # 0 0 0 0 0 0 0 0 0 0 0 with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) eventWise.append(**params) FormShower.append_b_idxs(eventWise) TrueTag.add_inheritance(eventWise, jet_name) # the first event is empty eventWise.selected_event = 0 assert len(eventWise.Jet_Inheritance) == 0 assert len(eventWise.Jet_ITags) == 0 # the second event has two jets eventWise.selected_event = 1 expected0 = [[0, 0, 0], [np.sqrt(103) / (np.sqrt(393) + np.sqrt(103)), 0, 1]] expected1 = [[0, 2 / 5, 1], [0, 0, 0]] tst.assert_allclose(eventWise.Jet_Inheritance.tolist()[0], expected0) tst.assert_allclose(eventWise.Jet_Inheritance.tolist()[1], expected1) expected_tags = [[1], [6]]
def test_add_mass_share(): params = {} jet_name = "Jet" # event 0 params['Jet_Label'] = [ak.from_iter([])] params['Energy'] = [ak.from_iter([])] params['Px'] = [ak.from_iter([])] params['Py'] = [ak.from_iter([])] params['Pz'] = [ak.from_iter([])] params['Children'] = [ak.from_iter([])] params['Parents'] = [ak.from_iter([])] params['MCPID'] = [ak.from_iter([])] params['JetInputs_SourceIdx'] = [ak.from_iter([])] params['TagIndex'] = [ak.from_iter([])] # event 1 params['JetInputs_SourceIdx'] += [ak.from_iter(np.arange(6))] params['Jet_Label'] += [ak.from_iter([[0, 101, 2], [102, 4, 5]])] params['Energy'] += [ ak.from_iter([30., 10., 20., 70., 20., 10., 45., 56., 40., 25.]) ] params['Px'] += [ak.from_iter([3., 0., 2., 1., 2., -1., 0., 3., -1., 0.])] params['Py'] += [ak.from_iter([3., 0., 2., 2., 2., 1., -1., -3., 0., -1.])] params['Pz'] += [ak.from_iter([3., 0., 2., 0., 2., 2., -5., -2., 1., 0.])] # invarient_mass 873 100 388 4859, 388, 94 # shifted energy 30 10 20 70 sqrt(393) sqrt(103) # 0 1 2 3 4 5 6 7 8 9 10 params['Children'] += [ ak.from_iter([[], [3], [], [5], [], [], [2, 7, 8, 9], [], [], [], []]) ] params['Parents'] += [ ak.from_iter([[], [], [6], [1], [], [3], [], [6], [6], [6], []]) ] params['MCPID'] += [ak.from_iter([4, -5, 5, 3, 2, 1, -5, -1, 7, 11, 12])] params['TagIndex'] += [ak.from_iter([1, 6])] # the positivity will be 0 1 0 1 0 1 0 0 0 0 0 # 0 0 1 0 0 0 0 0 0 0 0 # in jet 1 are decendents 2 from tag 1 # in jet 2 are decendents 5 from tag 0 with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) eventWise.append(**params) FormShower.append_b_idxs(eventWise) TrueTag.add_mass_share(eventWise, jet_name) # the first event is empty eventWise.selected_event = 0 assert len(eventWise.Jet_TagMass) == 0 assert len(eventWise.Jet_MTags) == 0 # the second event has two jets eventWise.selected_event = 1 expected = [[0., np.sqrt(388.)], [np.sqrt(94.), 0.]] expected_tags = [[6], [1]] tst.assert_allclose(eventWise.Jet_TagMass.tolist(), expected) tst.assert_allclose(eventWise.Jet_MTags.tolist(), expected_tags)
def test_descendant_idxs(): # 0 1 2 3 4 5 6 7 8 9 10 children = [[], [2, 3], [5], [6, 5], [], [], [7, 8, 9], [], [], [], []] with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) eventWise.append(Children=[ak.from_iter(children)]) eventWise.selected_event = 0 tst.assert_allclose(list(FormShower.descendant_idxs(eventWise, 0)), [0]) tst.assert_allclose(list(FormShower.descendant_idxs(eventWise, 2)), [5]) tst.assert_allclose(sorted(FormShower.descendant_idxs(eventWise, 1)), [5, 7, 8, 9])
def test_upper_layers(): # will need an eventwise with Parents, Children, MCPID # layer -1 0 1 1 -1 2 2 3 3 3 -1 # idx 0 1 2 3 4 5 6 7 8 9 10 children = [[], [2, 3], [5], [6, 5], [], [], [7, 8, 9], [], [], [], []] parents = [[], [], [1], [1], [], [2, 3], [3], [6], [6], [6], []] mcpid = [4, 5, 5, 3, 2, 1, -5, -1, 7, 11, 12] expected = [2, 6] labeler = PDGNames.IDConverter() with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) eventWise.append(Children=[ak.from_iter(children)], Parents=[ak.from_iter(parents)], MCPID=[ak.from_iter(mcpid)]) eventWise.selected_event = 0 expected_particle_idx = [0, 1, 2, 3, 4, 10] expected_children = ak.from_iter( [c for i in expected_particle_idx for c in children[i]]) expected_parents = ak.from_iter( [p for i in expected_particle_idx for p in parents[i]]) expected_labels = [labeler[mcpid[i]] for i in expected_particle_idx] shower = FormShower.upper_layers(eventWise, n_layers=2) order = np.argsort(shower.particle_idxs) tst.assert_allclose(shower.particle_idxs[order], expected_particle_idx) tst.assert_allclose(ak.flatten(ak.from_iter(shower.children[order])), expected_children) tst.assert_allclose(ak.flatten(ak.from_iter(shower.parents[order])), expected_parents) for a, b in zip(shower.labels[order], expected_labels): assert a == b # try with capture pids expected_particle_idx = [0, 1, 2, 3, 4, 5, 6, 10] expected_children = ak.from_iter( [c for i in expected_particle_idx for c in children[i]]) expected_parents = ak.from_iter( [p for i in expected_particle_idx for p in parents[i]]) expected_labels = [labeler[mcpid[i]] for i in expected_particle_idx] shower = FormShower.upper_layers(eventWise, n_layers=2, capture_pids=[1]) order = np.argsort(shower.particle_idxs) tst.assert_allclose(shower.particle_idxs[order], expected_particle_idx) tst.assert_allclose(ak.flatten(ak.from_iter(shower.children[order])), expected_children) tst.assert_allclose(ak.flatten(ak.from_iter(shower.parents[order])), expected_parents) for a, b in zip(shower.labels[order], expected_labels): assert a == b
def check(children, parents, is_leaf, mcpid, expected_roots, expected_shared): with TempTestDir("tst") as dir_name: eventWise = Components.EventWise( os.path.join(dir_name, "tmp.parquet")) eventWise.append(Children=ak.from_iter(children), Parents=ak.from_iter(parents), Is_leaf=ak.from_iter(is_leaf), MCPID=ak.from_iter(mcpid)) eventWise.selected_event = 0 all_roots, shared_counts = FormShower.shared_ends(eventWise) if len( all_roots ) == 0: # don't really care about the dimensions for 0 length assert len(expected_roots) == 0 assert len(np.array(shared_counts).flatten()) == 0 else: root_err_msg = f"Showers with parentage {parents}, expected {expected_roots}, found {all_roots}" count_err_msg = f"Showers with parentage {parents}, expected {expected_shared}, found {shared_counts}" # need to impost accending order on them root_order = np.argsort(all_roots) shared_counts = np.array(shared_counts)[root_order] shared_counts = shared_counts[:, root_order] all_roots = np.array(all_roots)[root_order] tst.assert_allclose(all_roots, expected_roots, err_msg=root_err_msg) tst.assert_allclose(shared_counts, expected_shared, err_msg=count_err_msg)
def test_append_b_idxs(): # 0 1 2 3 4 5 6 7 8 9 10 children = [[], [2, 3], [5], [6, 5], [], [], [7, 8, 9], [], [], [], []] mcpid = [4, 5, 5, 3, 2, 1, -5, 1, 7, 11, 12] expected = [2, 6] with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) eventWise.append(Children=[ak.from_iter(children)], MCPID=[ak.from_iter(mcpid)]) found = FormShower.append_b_idxs(eventWise, append=False) tst.assert_allclose(sorted(found['BQuarkIdx'][0]), expected) found = FormShower.append_b_idxs(eventWise, silent=False, append=True) eventWise.selected_event = 0 tst.assert_allclose(sorted(eventWise.BQuarkIdx), expected) found = FormShower.append_b_idxs(eventWise, silent=False, append=True) assert found == True
def label_parings(eventWise): """ For every pair of jet inputs, label if they are from the same b quark Parameters ---------- eventWise : EventWise Data set containing particle data. Returns ------- labels : list of numpy arrays of bools for each """ labels = [] eventWise.selected_event = None for event_n in range(len(eventWise.X)): eventWise.selected_event = event_n jet_inputs = eventWise.JetInputs_SourceIdx n_inputs = len(jet_inputs) local = np.full((n_inputs, n_inputs), False, dtype=bool) for b in eventWise.BQuarkIdx: decendants = FormShower.descendant_idxs(eventWise, b) is_decendent = np.fromiter((p in decendants for p in jet_inputs), dtype=bool) local += np.expand_dims(is_decendent, 0) * np.expand_dims( is_decendent, 1) labels.append(local) return labels
def decendants_width(eventWise, *root_idxs, only_visible=True): """ Get the width of the particals showered from the specified roots. Parameters ---------- eventWise : EventWise object containting particle data *root_idxs : floats integers specifying the locations in the eventWise of the root particles of the shower only_visible : bool should only particles in eventWise.JetInputs_SourceIdx be used to measure the width? Returns ------- : float width of the shower """ assert eventWise.selected_event is not None decendants = list(FormShower.descendant_idxs(eventWise, *root_idxs)) # select only the visible objects if only_visible: decendants = [ d for d in decendants if d in eventWise.JetInputs_SourceIdx ] rapidity = eventWise.Rapidity phi = eventWise.Phi dwidth = width(rapidity[decendants], phi[decendants]) return dwidth
def calculate_roots_showers(data, bcreator=25, lcreator=23): n_events = len(data.X) b_root_name = "Is_BRoot" l_root_name = "Is_lRoot" bg_root_name = "Is_BGRoot" name_pids = [(b_root_name, 5, bcreator), (l_root_name, np.array([11, 13, 15]), lcreator)] for name, pid, creator in name_pids: if name in data.columns: continue print(f"\n\n{name}\n\n") root_idxs, shower_idxs = [], [] for event_n in range(n_events): if event_n % 10 == 0: print(f"\t\t{event_n/n_events:.0%} ", end="\r") data.selected_event = event_n try: roots = get_paired_tags(data, pid, creator) except: jet_tools.st() roots = get_paired_tags(data, pid, creator) shower = FormShower.descendant_idxs(data, *roots) root_idxs.append(list(roots)) shower_idxs.append(list(shower)) data.append(**{ name: root_idxs, name.replace("Root", "Shower"): shower_idxs }) if bg_root_name not in data.columns: print(f"\n\n{bg_root_name}\n\n") root_idxs, shower_idxs = [], [] for event_n in range(n_events): if event_n % 10 == 0: print(f"{event_n/n_events:.0%} ", end="\r") data.selected_event = event_n tag_idxs = np.concatenate( [getattr(data, name) for name, _, _ in name_pids]) roots = get_bg_tags(data, tag_idxs) shower = FormShower.descendant_idxs(data, *roots) root_idxs.append(list(roots)) shower_idxs.append(list(shower)) data.append( **{ bg_root_name: root_idxs, bg_root_name.replace("Root", "Shower"): shower_idxs })
def get_visible_children(eventWise): visibles = set(eventWise.JetInputs_SourceIdx) showers = FormShower.get_showers(eventWise) results = [sorted(visibles.intersection(s.particle_idxs)) for s in showers] roots = [s.root_idxs[0] for s in showers] results = ak.from_iter(results) assert len(visibles) == len(set( ak.flatten(results))), f"Event {eventWise.selected_event}" return results, roots
def plot_decendants(ax, eventWise, tag_idx, marker, colour='r'): all_decendants = FormShower.descendant_idxs(eventWise, tag_idx) decendants = sorted( all_decendants.intersection(eventWise.JetInputs_SourceIdx)) ax.scatter(eventWise.Rapidity[decendants], eventWise.Phi[decendants], 100, edgecolor=colour, marker=marker, color=(0, 0, 0, 0), label=f"Index={tag_idx}")
def check(children, parents, mcpid, exclude_pids, expected): with TempTestDir("tst") as dir_name: eventWise = Components.EventWise( os.path.join(dir_name, "tmp.parquet")) eventWise.append(Children=[ak.from_iter(children)], Parents=[ak.from_iter(parents)], MCPID=[ak.from_iter(mcpid)]) eventWise.selected_event = 0 showers = FormShower.get_showers(eventWise, exclude_pids) err_msg = f"Showers with parentage {parents}, expected {expected}, found {[set(s.particle_idxs) for s in showers]}" assert len(showers) == len(expected), err_msg for shower in showers: assert set(shower.particle_idxs) in expected, err_msg
def test_from_shower(): # will need an eventwise with Parents, Children, MCPID # layer -1 0 1 1 -1 2 2 3 3 3 -1 # idx 0 1 2 3 4 5 6 7 8 9 10 children = [[], [0, 2, 3], [5], [6, 5, 4], [], [], [7, 8, 9], [], [], []] parents = [[1], [], [1], [1], [3], [2, 3], [3], [6], [6], [6]] mcpid = [4, 5, 5, 3, 2, 1, -5, -1, 7, 11] n_nodes = len(children) shower = FormShower.Shower(list(range(n_nodes)), parents, children, mcpid) dot = DrawTrees.DotGraph(shower) with TempTestDir("tst") as dir_name: path = os.path.join(dir_name, "graph.dot") with open(path, 'w') as graph_file: graph_file.write(str(dot)) graph = read_dot(path) assert len(graph.nodes) == n_nodes assert len(graph.edges) == 10
def main(): """Launch file, makes and saves a dot graph""" repeat = True eventWise_path = InputTools.get_file_name("Name the eventWise; ", '.awkd') eventWise = Components.EventWise.from_file(eventWise_path) while repeat: from jet_tools import FormShower eventWise.selected_event = int(input("Event number: ")) showers = FormShower.get_showers(eventWise) jet_name = "HomeJet" chosen_showers = [] for i, shower in enumerate(showers): shower_roots = [shower.labels[i] for i in shower.root_local_idxs] if 'b' not in shower_roots and 'bbar' not in shower_roots: continue chosen_showers.append(shower) print(f"Shower roots {shower.root_idxs}") max_children = max([len(d) for d in shower.children]) end_ids = shower.ends print( f"Drawing shower {i}, has {max_children} max children. Daughters to particles ratio = {max_children/len(shower.children)}" ) # pick the jet with largest overlap largest_overlap = 0 picked_jet = 0 for i in range(len(eventWise.HomeJet_Parent)): is_external = getattr(eventWise, jet_name + "_Child1")[i] < 0 input_idx = getattr(eventWise, jet_name + "_Label")[i][is_external] jet_particles = eventWise.JetInputs_SourceIdx[input_idx] matches_here = sum([p in end_ids for p in jet_particles]) if matches_here > largest_overlap: largest_overlap = matches_here picked_jet = i print( f"A jet contains {largest_overlap} out of {len(end_ids)} end products" ) graph = DotGraph(shower=shower, eventWise=eventWise, jet_name=jet_name, jet_num=picked_jet, use_TracksTowers=True) base_name = f"event{eventWise.selected_event}_plot" dotName = base_name + str(i) + ".dot" legendName = base_name + str(i) + "_ledg.dot" with open(dotName, 'w') as dotFile: dotFile.write(str(graph)) with open(legendName, 'w') as dotFile: dotFile.write(graph.legend) #amalgam_shower = chosen_showers[0] #if len(chosen_showers)>1: # for shower in chosen_showers[1:]: # amalgam_shower.amalgamate(shower) #print("Drawing the amalgam of all b showers") #graph = DotGraph(shower=amalgam_shower, observables=obs) #dotName = f"event{eventWise.selected_event}_mixing_plot.dot" #legendName ="mixing_ledg.dot" #with open(dotName, 'w') as dotFile: # dotFile.write(str(graph)) #with open(legendName, 'w') as dotFile: # dotFile.write(graph.legend) repeat = InputTools.yesNo_question("Again? ")
def add_inheritance(eventWise, jet_name, batch_length=100, silent=False, append=True): """ Add the inheritance from each to the tagging particles Represents the portion of the energy that has been derived from the true particles in the rest frame of the root particle. The highest percentage inheritance*jet energy gets the itag. Parameters ---------- eventWise : EventWise dataset containing locations of particles and jets jet_name : str The prefix of the jet vairables in the eventWise batch_length: int max number of events to process (Default value = 100) silent : bool Should the progress be printed? (Default value = False) append : bool Should the results be appended to the eventWise? (Default value = True) Returns ------- (if append is false) content: dict of awkward arrays content for eventWise """ eventWise.selected_event = None name = jet_name + "_Inheritance" tag_name = jet_name + "_ITags" n_events = len(getattr(eventWise, jet_name + "_Energy", [])) jet_inhs = list(getattr(eventWise, name, [])) jet_tags = list(getattr(eventWise, tag_name, [])) start_point = len(jet_inhs) if start_point >= n_events: print("Finished") if append: return else: content = {} content[name] = ak.from_iter(jet_inhs) return content end_point = min(n_events, start_point + batch_length) if not silent: print(f" Will stop at {end_point/n_events:.1%}") # will actually compare the square of the angle for speed for event_n in range(start_point, end_point): if event_n % 10 == 0 and not silent: print(f"{event_n/n_events:.1%}", end='\r', flush=True) if os.path.exists("stop"): print(f"Completed event {event_n-1}") break eventWise.selected_event = event_n jets_idxs = getattr(eventWise, jet_name + "_Label") inhs_here = [] tags_here = [[] for _ in jets_idxs] if len(tags_here) > 0: parents_idxs = getattr(eventWise, jet_name + "_Parent") roots = getattr(eventWise, jet_name + "_Parent") == -1 energies = getattr(eventWise, jet_name + "_Energy") pxs = getattr(eventWise, jet_name + "_Px") pys = getattr(eventWise, jet_name + "_Py") pzs = getattr(eventWise, jet_name + "_Pz") rf_energies = get_root_rest_energies(roots, energies, pxs, pys, pzs) root_energies = energies[roots] sourceidx = eventWise.JetInputs_SourceIdx.tolist() for tag_idx in eventWise.TagIndex: inhs_here.append([]) tag_decendants = [ sourceidx.index(d) for d in FormShower.descendant_idxs(eventWise, tag_idx) if d in sourceidx ] for jet_idx, parent_idx, energy in zip(jets_idxs, parents_idxs, rf_energies): ratings = percent_pos(jet_idx, parent_idx, tag_decendants, energy) inhs_here[-1].append(ratings) inhs_here[-1] = ak.from_iter(inhs_here[-1]) if np.any(np.any(inhs_here[-1] > 0)): # if all the inheritances are 0, then no tags # decide who gets the tag root_scores = root_energies * inhs_here[-1][roots] tags_here[np.argmax(root_scores)].append(tag_idx) jet_inhs.append(ak.from_iter(inhs_here)) jet_tags.append(ak.from_iter(tags_here)) content = {} content[name] = ak.from_iter(jet_inhs) content[tag_name] = ak.from_iter(jet_tags) if append: eventWise.append(**content) else: return content
def add_mass_share(eventWise, jet_name, batch_length=100, silent=False, append=True): """ Tagging procedure based on which jet has the largest portion of the tag's mass. Parameters ---------- eventWise : EventWise dataset containing locations of particles and jets jet_name : str The prefix of the jet vairables in the eventWise batch_length: int max number of events to process (Default value = 100) silent : bool Should the progress be printed? (Default value = False) append : bool Should the results be appended to the eventWise? (Default value = True) Returns ------- (if append is false) content: dict of awkward arrays content for eventWise """ eventWise.selected_event = None name = jet_name + "_TagMass" tag_name = jet_name + "_MTags" n_events = len(getattr(eventWise, jet_name + "_Label", [])) jet_tagmass2 = list(getattr(eventWise, name, np.array([]))**2) jet_tags = list(getattr(eventWise, tag_name, [])) start_point = len(jet_tagmass2) if start_point >= n_events: print("Finished") if append: return else: content = {} content[name] = ak.from_iter(jet_tagmass2)**0.5 content[tag_name] = ak.from_iter(jet_tags) return content end_point = min(n_events, start_point + batch_length) if not silent: print(f" Will stop at {end_point/n_events:.1%}") for event_n in range(start_point, end_point): if event_n % 10 == 0 and not silent: print(f"{event_n/n_events:.1%}", end='\r', flush=True) if os.path.exists("stop"): print(f"Completed event {event_n-1}") break eventWise.selected_event = event_n jets_idxs = getattr(eventWise, jet_name + "_Label") tags_here = [[] for _ in jets_idxs] mass2_here = [[] for _ in jets_idxs] this_tag = np.zeros(len(jets_idxs)) if len(tags_here) > 0: this_tag[:] = 0. energies = eventWise.Energy pxs = eventWise.Px pys = eventWise.Py pzs = eventWise.Pz sourceidx = eventWise.JetInputs_SourceIdx.tolist() # :( for tag_idx in eventWise.TagIndex: tag_decendants = { sourceidx.index(d) for d in FormShower.descendant_idxs(eventWise, tag_idx) if d in sourceidx } for jet_n, jet_idx in enumerate(jets_idxs): tag_in_jet = list(tag_decendants.intersection(jet_idx)) mass2 = np.sum(energies[tag_in_jet])**2 - np.sum(pxs[tag_in_jet])**2 - \ np.sum(pys[tag_in_jet])**2 - np.sum(pzs[tag_in_jet])**2 mass2_here[jet_n].append(mass2) this_tag[jet_n] = mass2 # IndexError if (this_tag > 0 ).any(): # if all the inheritances are 0, then no tags # decide who gets the tag tags_here[np.argmax(this_tag)].append(tag_idx) jet_tagmass2.append(ak.from_iter(mass2_here)) jet_tags.append(ak.from_iter(tags_here)) content = {} content[name] = ak.from_iter(jet_tagmass2)**0.5 content[tag_name] = ak.from_iter(jet_tags) if append: eventWise.append(**content) else: return content
def add_detectable_fourvector(eventWise, tag_name="TagIndex", silent=False): """ Add a list of detectable four vectors for the tags, as present in the JetInputs. also add the indices themselves. Parameters ---------- eventWise : EventWise dataset containing locations of particles and jets tag_name : str name of the column in the eventWise that countains the indices of the tags that we wish to use (Default="TagIndex") """ eventWise.selected_event = None name = "DetectableTag" if "TagIndex" not in eventWise.columns and tag_name == "TagIndex": add_tag_particles(eventWise, silent=silent) tag_particles = getattr(eventWise, tag_name) # the leaves are the bits that are detected, the roots are the tag particles # group roots with common leaves leaves = [] roots = [] invisible = [] px = [] py = [] pz = [] energy = [] for i, tag_idxs in enumerate(tag_particles): eventWise.selected_event = i shower_inputs = set(eventWise.JetInputs_SourceIdx) all_energy = eventWise.Energy all_px = eventWise.Px all_py = eventWise.Py all_pz = eventWise.Pz per_tag_detectables = [] per_tag_undetectables = [] for tag in tag_idxs: tag_decendants = FormShower.descendant_idxs(eventWise, tag) detectables = shower_inputs.intersection(tag_decendants) undetectables = tag_decendants - detectables per_tag_detectables.append(detectables) per_tag_undetectables.append(undetectables) # now work out what overlaps leaves.append([]) invisible.append([]) roots.append([]) energy.append([]) px.append([]) py.append([]) pz.append([]) unallocated = np.ones(len(tag_idxs), dtype=bool) while np.any(unallocated): position = next(i for i, free in enumerate(unallocated) if free) unallocated[position] = False # start from the first free tag seed = per_tag_detectables[position] # make a mask of what will be grouped with if not seed: # this tag is undetectable continue group_with = [ g for g, other in enumerate(per_tag_detectables) if not seed.isdisjoint(other) ] unallocated[group_with] = False roots[-1].append(tag_idxs[group_with].tolist()) detectables = sorted(set().union(*(per_tag_detectables[g] for g in group_with))) undetectables = sorted(set().union(*(per_tag_undetectables[g] for g in group_with))) leaves[-1].append(detectables) invisible[-1].append(undetectables) # now find the kinematics energy[-1].append(np.sum(all_energy[detectables])) px[-1].append(np.sum(all_px[detectables])) py[-1].append(np.sum(all_py[detectables])) pz[-1].append(np.sum(all_pz[detectables])) params = { name + "_Leaves": ak.from_iter(leaves), "UndetectableTag_Leaves": ak.from_iter(invisible), name + "_Roots": ak.from_iter(roots), name + "_Energy": ak.from_iter(energy), name + "_Px": ak.from_iter(px), name + "_Py": ak.from_iter(py), name + "_Pz": ak.from_iter(pz) } eventWise.append(**params)