def test_numba_getitem_tuple_slice_boolarray(self): a = numpy.arange(36).reshape(4, 3, 3) a2 = awkward.fromiter(a) @numba.njit def test1(x, i): return x[1:3, i] assert test1(a, numpy.array([True, False, True])).tolist() == [[[9, 10, 11], [15, 16, 17]], [[18, 19, 20], [24, 25, 26]]] assert test1(a2, numpy.array([True, False, True])).tolist() == [[[9, 10, 11], [15, 16, 17]], [[18, 19, 20], [24, 25, 26]]] @numba.njit def test2(x, i, j): return x[1:3, i, j] assert test2.py_func(a, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[9, 16], [18, 25]] assert test2(a2, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[9, 16], [18, 25]] a = numpy.arange(27).reshape(3, 3, 3) a2 = awkward.fromiter(a) @numba.njit def test3(x, i, j): return x[i, j] assert test3.py_func(a, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[0, 1, 2], [21, 22, 23]] assert test3(a2, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[0, 1, 2], [21, 22, 23]] @numba.njit def test4(x, i, j): return x[i, :, j] assert test4.py_func(a, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[0, 3, 6], [19, 22, 25]] assert test4(a2, numpy.array([True, False, True]), numpy.array([True, True, False])).tolist() == [[0, 3, 6], [19, 22, 25]]
def _append_object(event_list, field): new_event_list = [] for i in range(len(event_list)): event_list_i = awkward.fromiter(event_list[i]) field_i = awkward.fromiter(field[i]) new_event_list.append(awkward.concatenate([event_list_i, field_i], axis=1).tolist()) return new_event_list
def test_numba_getitem_jagged_intarray(self): a = JaggedArray.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) a2 = JaggedArray.fromcounts([2, 0, 1], a) @numba.njit def test1(x, i): return x[i] assert test1(a, awkward.fromiter([[2, 0, 0], [], [1]])).tolist() == [[3.3, 1.1, 1.1], [], [5.5]] assert test1(a2, awkward.fromiter([[1, 0], [], [0]])).tolist() == [[[], [1.1, 2.2, 3.3]], [], [[4.4, 5.5]]] assert test1(a2, awkward.fromiter([[[2, 0, 0], []], [], [[1]]])).tolist() == [[[3.3, 1.1, 1.1], []], [], [[5.5]]]
def test_numba_getitem_jagged_boolarray(self): a = JaggedArray.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) a2 = JaggedArray.fromcounts([2, 0, 1], a) @numba.njit def test1(x, i): return x[i] assert test1(a, awkward.fromiter([[True, False, True], [], [False, True]])).tolist() == [[1.1, 3.3], [], [5.5]] assert test1(a2, awkward.fromiter([[True, False], [], [True]])).tolist() == [[[1.1, 2.2, 3.3]], [], [[4.4, 5.5]]] assert test1(a2, awkward.fromiter([[[True, False, True], []], [], [[False, True]]])).tolist() == [[[1.1, 3.3], []], [], [[5.5]]]
def test_jagged_pad(self): a = awkward.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) assert a.pad(4, clip=True).tolist() == [[1.1, 2.2, 3.3, None], [None, None, None, None], [4.4, 5.5, None, None]] assert a.pad(4, numpy.ma.masked, clip=True).regular().tolist() == [[1.1, 2.2, 3.3, None], [None, None, None, None], [4.4, 5.5, None, None]] assert a.pad(4).tolist() == [[1.1, 2.2, 3.3, None], [None, None, None, None], [4.4, 5.5, None, None]] assert a.pad(4, numpy.ma.masked).regular().tolist() == [[1.1, 2.2, 3.3, None], [None, None, None, None], [4.4, 5.5, None, None]] a = awkward.fromiter([[1.1, 2.2, 3.3, 4.4, 5.5], [], [6.6, 7.7, 8.8], [9.9]]) assert a.pad(3).tolist() == [[1.1, 2.2, 3.3, 4.4, 5.5], [None, None, None], [6.6, 7.7, 8.8], [9.9, None, None]] assert a.pad(3, clip=True).tolist() == [[1.1, 2.2, 3.3], [None, None, None], [6.6, 7.7, 8.8], [9.9, None, None]]
def process(self, df): output = self.accumulator.identity() dataset = df['dataset'] leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'].content, py=df['pfjet_p4.fCoordinates.fY'].content, pz=df['pfjet_p4.fCoordinates.fZ'].content, energy=df['pfjet_p4.fCoordinates.fT'].content, ) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype==3).sum() ndsa = (ljdautype==8).sum() isegammajet = (npfmu==0)&(ndsa==0) ispfmujet = (npfmu>=2)&(ndsa==0) isdsajet = ndsa>0 label = isegammajet.astype(int)*1+ispfmujet.astype(int)*2+isdsajet.astype(int)*3 leptonjets.add_attributes(label=label) nmu = ((ljdautype==3)|(ljdautype==8)).sum() leptonjets.add_attributes(ismutype=(nmu>=2), iseltype=(nmu==0)) ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum() leptonjets.add_attributes(qsum=ljdaucharge) leptonjets.add_attributes(isneutral=(leptonjets.iseltype | (leptonjets.ismutype&(leptonjets.qsum==0)))) leptonjets = leptonjets[leptonjets.isneutral] ## __ twoleptonjets__ twoleptonjets = leptonjets.counts>=2 dileptonjets = leptonjets[twoleptonjets] if dileptonjets.size==0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum()==1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents&muljInLeading2Events).astype(int)*1 doubleMuljEvents = dileptonjets.ismutype.sum()==2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents&muljIsLeading2Events).astype(int)*2 channel_ = channel_2mu2e + channel_4mu ########### output['invm_s'].fill(dataset=dataset, mass_s=(lj0+lj1).p4.mass[channel_>0].flatten()) output['invm_m'].fill(dataset=dataset, mass_m=(lj0+lj1).p4.mass[channel_>0].flatten()) output['invm_l'].fill(dataset=dataset, mass_l=(lj0+lj1).p4.mass[channel_>0].flatten()) return output
def test_event_selection_vectors(): original_file = uproot.open('tests/vectors_tree_file.root') treename = 'tree' original_tree = original_file[treename] new_filename = tempfile.mkstemp(suffix='.root', dir=os.getcwd())[1] try: clone_tree(original_tree, new_filename, selection=[True, False, True]) new_file = uproot.open(new_filename) new_tree = new_file[treename] assert new_tree['int_vector_branch'].array().tolist() == [[], [13]] assert abs(new_tree['float_vector_branch'].array() - awkward.fromiter([[], [15.15]])).max().max() < 1e-5 assert abs(new_tree['double_vector_branch'].array() - awkward.fromiter([[], [16.16]])).max().max() < 1e-5 finally: if os.path.isfile(new_filename): os.remove(new_filename)
def arrow_table(self, chunk_size, event_limit=sys.maxint): def group(iterator, n): """ Batch together chunks of events into a single yield :param iterator: Iterator from which events are drown :param n: Number of events to include in each yield :return: Yields a list of n or fewer events """ done = False while not done: results = [] try: for i in range(n): results.append(iterator.next()) yield results except StopIteration: done = True yield results for events in group(self.event_iterator.iterate(event_limit), chunk_size): object_array = awkward.fromiter(events) attr_dict = {} for attr_name in self.event_iterator.attr_name_list: branch_name = attr_name.split('.')[0].strip(' ') a_name = attr_name.split('.')[1] attr_dict[branch_name + '_' + a_name.strip('()')] = \ object_array[branch_name][a_name] object_table = awkward.Table(**attr_dict) yield awkward.toarrow(object_table)
def ak_transpose(array): """ Transposes the first two dimensions of an awkward array. Useful for (n_events x n_features) --> (n_features x n_events) or (n_features x n_events) --> (n_events x n_features) """ return ak.fromiter(array[:, i] for i in range(len(array[0])))
def load_root(file, ttname, ilo=None, ihi=None, brlist=None): """ use pyroot to save decoded (uncompressed) MGTWaveforms, into awkward's hdf5 file object. this is to compare against uproot, which reads compressed wfs. """ from ROOT import TFile, TTree, MGTWaveform, MJTMSWaveform tf = TFile(file) tt = tf.Get(ttname) nevt = tt.GetEntries() tt.GetEntry(0) is_ms = tt.run.GetUseMultisampling() # build w/ python primitive types and convert to JaggedArray after the loop. # JaggedArray requires one entry per event (have to handle multi-detector). br_list = ['fWaveforms', 'fAuxWaveforms', 'fMSWaveforms' ] if is_ms else ['fWaveforms'] pyarrs = {br: [] for br in br_list} delim = 0xDEADBEEF # loop over tree ilo = 0 if ilo == None else ilo ihi = nevt if ihi == None else ihi for i in range(ilo, ihi): tt.GetEntry(i) nwf = tt.channelData.GetEntries() # concat each hit into a single array ewf, ewfa, ewfms = [], [], [] for j in range(nwf): if is_ms: wf = tt.event.GetWaveform(j) wfa = tt.event.GetAuxWaveform(j) wfms = MJTMSWaveform(wf, wfa) ewf.extend([wf[i] for i in range(wf.GetLength())]) ewfa.extend([wfa[i] for i in range(wfa.GetLength())]) ewfms.extend(wfms[i] for i in range(wfms.GetLength())) ewf.append(delim) ewfa.append(delim) ewfms.append(delim) else: wf = tt.event.GetWaveform(j) ewf.extend([wf[i] for i in range(wf.GetLength())]) ewf.append(delim) if is_ms: pyarrs['fWaveforms'].append(ewf) pyarrs['fAuxWaveforms'].append(ewfa) pyarrs['fMSWaveforms'].append(ewfms) else: pyarrs['fWaveforms'].append(ewf) uarrs = {} for wf in pyarrs.keys(): uarrs[wf] = awkward.fromiter(pyarrs[wf]) return uarrs
def test_numba_getitem_tuple_slice_intarray(self): a = numpy.arange(36).reshape(4, 3, 3) a2 = awkward.fromiter(a) @numba.njit def test1(x, i): return x[1:3, i] assert test1(a, numpy.array([1, 0, 2])).tolist() == [[[12, 13, 14], [9, 10, 11], [15, 16, 17]], [[21, 22, 23], [18, 19, 20], [24, 25, 26]]] assert test1(a2, numpy.array([1, 0, 2])).tolist() == [[[12, 13, 14], [9, 10, 11], [15, 16, 17]], [[21, 22, 23], [18, 19, 20], [24, 25, 26]]]
def test_physics_jetcleaning(self): jet_m = awkward.fromiter([[60.0, 70.0, 80.0], [], [90.0, 100.0]]) jet_pt = awkward.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]]) e_pt = awkward.fromiter([[20.2, 50.5], [50.5], [50.5]]) jet_eta = awkward.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]]) e_eta = awkward.fromiter([[-2.2, 0.0], [0.0], [1.1]]) jet_phi = awkward.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]]) e_phi = awkward.fromiter([[0.1, 0.78], [0.78], [-0.77]]) jets = uproot_methods.TLorentzVectorArray.from_ptetaphim( jet_pt, jet_eta, jet_phi, jet_m) electrons = uproot_methods.TLorentzVectorArray.from_ptetaphim( e_pt, e_eta, e_phi, 0.000511) combinations = jets.cross(electrons, nested=True) def delta_r(one, two): return one.delta_r(two) assert (~(delta_r(combinations.i0, combinations.i1) < 0.5).any() ).tolist() == [[True, False, True], [], [True, False]] (jets[~(delta_r(combinations.i0, combinations.i1) < 0.5).any()])
def test_issue367(self): t = uproot.open("tests/samples/issue367.root")["tree"] assert awkward.fromiter( t.array("weights.second"))[0].counts.tolist() == [ 1000, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 100, 100, 100, 1 ]
def merge_npzs_to_ak(rawdir, outfile=None, nmax=None): """ Loops over all .npz files in rawdir, stacks all events into an ak array, and dumps it to a file. """ if outfile is None: outfile = osp.dirname(rawdir) + '/merged.awkd' bbefp.logger.info(f'Merging {rawdir} --> {outfile}') merged = ak.fromiter(_iter_npzs(rawdir, nmax)) ak.save(outfile, ak_transpose(merged))
def test_floating_point_vectors(): original_file = uproot.open('tests/floating_point_vectors_tree_file.root') treename = 'tree' original_tree = original_file[treename] new_filename = tempfile.mkstemp(suffix='.root', dir=os.getcwd())[1] try: clone_tree(original_tree, new_filename) new_file = uproot.open(new_filename) new_tree = new_file[treename] assert new_tree['float_vector_branch'].array()[0].dtype == np.dtype('float32') assert abs(new_tree['float_vector_branch'].array() - awkward.fromiter([[], [-31.31, 32.32, 33.33], [-47.47]])).max().max() < 1e-5 assert new_tree['double_vector_branch'].array()[0].dtype == np.dtype('float64') assert abs(new_tree['double_vector_branch'].array() - awkward.fromiter([[], [-34.34, 35.35, 36.36], [-48.48]])).max().max() < 1e-5 finally: if os.path.isfile(new_filename): os.remove(new_filename)
def test_jagged_cross_argnested(self): a = awkward.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) b = awkward.fromiter([[100, 200], [300], [400]]) c = awkward.fromiter([[999], [999], [999, 888]]) assert a.cross(b).tolist() == [[(1.1, 100), (1.1, 200), (2.2, 100), (2.2, 200), (3.3, 100), (3.3, 200)], [], [(4.4, 400), (5.5, 400)]] assert a.argcross(b).tolist() == [[(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)], [], [(0, 0), (1, 0)]] assert a.cross(b, nested=True).tolist() == [[[(1.1, 100), (1.1, 200)], [(2.2, 100), (2.2, 200)], [(3.3, 100), (3.3, 200)]], [], [[(4.4, 400)], [(5.5, 400)]]] assert a.argcross(b, nested=True).tolist() == [[[(0, 0), (0, 1)], [(1, 0), (1, 1)], [(2, 0), (2, 1)]], [], [[(0, 0)], [(1, 0)]]] assert a.cross(b, nested=True).cross(c, nested=True).tolist()[0] == [[[(ai, bi, ci) for ci in c[0]] for bi in b[0]] for ai in a[0]] assert a.cross(b, nested=True).cross(c, nested=True).tolist()[1] == [[[(ai, bi, ci) for ci in c[1]] for bi in b[1]] for ai in a[1]] assert a.cross(b, nested=True).cross(c, nested=True).tolist()[2] == [[[(ai, bi, ci) for ci in c[2]] for bi in b[2]] for ai in a[2]] assert a.cross(b).cross(c).tolist() == [[(1.1, 100, 999), (1.1, 200, 999), (2.2, 100, 999), (2.2, 200, 999), (3.3, 100, 999), (3.3, 200, 999)], [], [(4.4, 400, 999), (4.4, 400, 888), (5.5, 400, 999), (5.5, 400, 888)]] assert a.cross(b, nested=True).cross(c).tolist() == [[[(1.1, 100, 999), (1.1, 200, 999)], [(2.2, 100, 999), (2.2, 200, 999)], [(3.3, 100, 999), (3.3, 200, 999)]], [], [[(4.4, 400, 999), (4.4, 400, 888)], [(5.5, 400, 999), (5.5, 400, 888)]]] assert a.cross(b).cross(c, nested=True).tolist() == [[[(1.1, 100, 999)], [(1.1, 200, 999)], [(2.2, 100, 999)], [(2.2, 200, 999)], [(3.3, 100, 999)], [(3.3, 200, 999)]], [], [[(4.4, 400, 999), (4.4, 400, 888)], [(5.5, 400, 999), (5.5, 400, 888)]]] a = awkward.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) b = awkward.fromiter([[100, 200], [300], [400]]) c = awkward.fromiter([[999], [999], [999, 888, 777]]) assert a.cross(b, nested=True).cross(c, nested=True).tolist()[0] == [[[(ai, bi, ci) for ci in c[0]] for bi in b[0]] for ai in a[0]] assert a.cross(b, nested=True).cross(c, nested=True).tolist()[1] == [[[(ai, bi, ci) for ci in c[1]] for bi in b[1]] for ai in a[1]] assert a.cross(b, nested=True).cross(c, nested=True).tolist()[2] == [[[(ai, bi, ci) for ci in c[2]] for bi in b[2]] for ai in a[2]] assert a.cross(b).cross(c).tolist() == [[(1.1, 100, 999), (1.1, 200, 999), (2.2, 100, 999), (2.2, 200, 999), (3.3, 100, 999), (3.3, 200, 999)], [], [(4.4, 400, 999), (4.4, 400, 888), (4.4, 400, 777), (5.5, 400, 999), (5.5, 400, 888), (5.5, 400, 777)]] assert a.cross(b, nested=True).cross(c).tolist() == [[[(1.1, 100, 999), (1.1, 200, 999)], [(2.2, 100, 999), (2.2, 200, 999)], [(3.3, 100, 999), (3.3, 200, 999)]], [], [[(4.4, 400, 999), (4.4, 400, 888), (4.4, 400, 777)], [(5.5, 400, 999), (5.5, 400, 888), (5.5, 400, 777)]]] assert a.cross(b).cross(c, nested=True).tolist() == [[[(1.1, 100, 999)], [(1.1, 200, 999)], [(2.2, 100, 999)], [(2.2, 200, 999)], [(3.3, 100, 999)], [(3.3, 200, 999)]], [], [[(4.4, 400, 999), (4.4, 400, 888), (4.4, 400, 777)], [(5.5, 400, 999), (5.5, 400, 888), (5.5, 400, 777)]]]
def test_jagged_zip(self): a = awkward.fromiter([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) b = awkward.JaggedArray([1, 5, 5], [4, 5, 7], [999, 10, 20, 30, 999, 40, 50, 999]) c = numpy.array([100, 200, 300]) d = 1000 assert awkward.JaggedArray.zip(one=a, two=b).tolist() == [[{"one": 1.1, "two": 10}, {"one": 2.2, "two": 20}, {"one": 3.3, "two": 30}], [], [{"one": 4.4, "two": 40}, {"one": 5.5, "two": 50}]] assert awkward.JaggedArray.zip(one=b, two=a).tolist() == [[{"one": 10, "two": 1.1}, {"one": 20, "two": 2.2}, {"one": 30, "two": 3.3}], [], [{"one": 40, "two": 4.4}, {"one": 50, "two": 5.5}]] assert awkward.JaggedArray.zip(one=b, two=c).tolist() == [[{"one": 10, "two": 100}, {"one": 20, "two": 100}, {"one": 30, "two": 100}], [], [{"one": 40, "two": 300}, {"one": 50, "two": 300}]] assert awkward.JaggedArray.zip(one=b, two=d).tolist() == [[{"one": 10, "two": 1000}, {"one": 20, "two": 1000}, {"one": 30, "two": 1000}], [], [{"one": 40, "two": 1000}, {"one": 50, "two": 1000}]] assert a.zip(b).tolist() == [[(1.1, 10), (2.2, 20), (3.3, 30)], [], [(4.4, 40), (5.5, 50)]] assert b.zip(a).tolist() == [[(10, 1.1), (20, 2.2), (30, 3.3)], [], [(40, 4.4), (50, 5.5)]] assert b.zip(c).tolist() == [[(10, 100), (20, 100), (30, 100)], [], [(40, 300), (50, 300)]] assert b.zip(d).tolist() == [[(10, 1000), (20, 1000), (30, 1000)], [], [(40, 1000), (50, 1000)]]
def NestNestObjArrayToJagged(objarr): """uproot read vector<vector<number>> TBranch as objectArray, this function convert it to JaggedJaggedArray """ # # jaggedArray of lists # jaggedList = JaggedArray.fromiter(objarr) # # flat to 1 level # _jagged = JaggedArray.fromiter(jaggedList.content) # return JaggedArray.fromoffsets(jaggedList.offsets, _jagged) return awkward.fromiter(objarr)
def mean_fname(fname, i): if useramdisk: _cp_xrootd_to_ramdisk(fname) means = {key: [] for key in all_branches} varis = {key: [] for key in all_branches} for arrays in uproot.iterate(fname, treename, branches + other_branches, namedecode="utf-8", entrysteps=entrysteps): for key in arrays: # convert vector<vector<T>> (ObjectArray by default) into nested JaggedArray if isinstance(arrays[key], awkward.ObjectArray): arrays[key] = awkward.fromiter(arrays[key]) arrays, keep = build_truth(arrays, fname) for key in arrays: arrays[key] = arrays[key][keep] arrays = transform(arrays) for key in arrays: if key not in all_branches: continue # skip jet_daughter since it was renamed if 'jet_daughter_' in key: continue a = arrays[key] while isinstance(a, awkward.JaggedArray): a = a.flatten() if a.size == 0: continue # remove NaNs, but should understand why they happen a = a[~np.isnan(a)] a = a[~np.isinf(a)] m = a.mean() v = a.var() if np.isnan(m): logging.error(f'NaN found: {key}') print(fname) raise ValueError elif np.isinf(m): logging.error(f'Inf found: {key}') print(fname) raise ValueError else: # protection against empty arrays means[key] += [m] varis[key] += [v] if useramdisk: _rm_ramdisk(fname) return {'means': means, 'varis': varis}
def test_physics_matching(self): gen_pt = awkward.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]]) reco_pt = awkward.fromiter([[20.2, 10.1, 30.3, 50.5], [50.5], [50.5]]) gen_eta = awkward.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]]) reco_eta = awkward.fromiter([[-2.2, -3.3, 2.2, 0.0], [0.0], [1.1]]) gen_phi = awkward.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]]) reco_phi = awkward.fromiter([[0.1, -1.4, 1.4, 0.78], [0.78], [-0.77]]) gen = uproot_methods.TLorentzVectorArray.from_ptetaphim( gen_pt, gen_eta, gen_phi, 0.2) reco = uproot_methods.TLorentzVectorArray.from_ptetaphim( reco_pt, reco_eta, reco_phi, 0.2) ("gen", gen) ("reco", reco) ("gen.cross(reco)", gen.cross(reco)) pairing = gen.cross(reco, nested=True) ("pairing = gen.cross(reco, nested=True)", gen.cross(reco, nested=True)) metric = pairing.i0.delta_r(pairing.i1) ("metric = pairing.i0.delta_r(pairing.i1)", metric) index_of_minimized = metric.argmin() ("index_of_minimized = metric.argmin()", index_of_minimized) assert index_of_minimized.tolist() == [[[1], [0], [2]], [], [[0], [0]]] ("metric[index_of_minimized]", metric[index_of_minimized]) passes_cut = (metric[index_of_minimized] < 0.5) ("passes_cut = (metric[index_of_minimized] < 0.5)", passes_cut) assert passes_cut.tolist() == [[[True], [True], [True]], [], [[False], [True]]] best_pairings_that_pass_cut = pairing[index_of_minimized][passes_cut] ("best_pairings_that_pass_cut = pairing[index_of_minimized][passes_cut]", best_pairings_that_pass_cut) genrecos = best_pairings_that_pass_cut.flatten(axis=1) ("genrecos = best_pairings_that_pass_cut.flatten(axis=1)", genrecos) ("genrecos.counts", genrecos.counts) ("gen.counts", gen.counts) assert genrecos.counts.tolist() == [3, 0, 1] assert gen.counts.tolist() == [3, 0, 2] ("genrecos.i0.pt", genrecos.i0.pt) ("genrecos.i1.pt", genrecos.i1.pt)
def make_jets_and_electrons(): jet_m = awkward.fromiter([[60.0, 70.0, 80.0], [], [90.0, 100.0]]) jet_pt = awkward.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]]) e_pt = awkward.fromiter([[20.2, 50.5], [50.5], [50.5]]) jet_eta = awkward.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]]) e_eta = awkward.fromiter([[-2.2, 0.0], [0.0], [1.1]]) jet_phi = awkward.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]]) e_phi = awkward.fromiter([[0.1, 0.78], [0.78], [-0.77]]) jets = uproot_methods.TLorentzVectorArray.from_ptetaphim( jet_pt, jet_eta, jet_phi, jet_m) electrons = uproot_methods.TLorentzVectorArray.from_ptetaphim( e_pt, e_eta, e_phi, 0.000511) return jets, electrons
def __next__(self): if self._batches is None: raise Exception( 'Please set entrysteps using set_entrysteps() function before use!' ) if self._current_index >= len(self._batches): self._current_index = 0 raise StopIteration sub_table = pa.Table.from_batches( self._batches[self._current_index:self._current_index + 1]) self._current_index = self._current_index + 1 chunk = sub_table.to_pydict() for item in chunk: chunk[item] = awkward.fromiter(chunk[item]) return chunk
def test_new_vector_branch(): original_file = uproot.open('tests/vectors_tree_file.root') treename = 'tree' original_tree = original_file[treename] new_filename = tempfile.mkstemp(suffix='.root', dir=os.getcwd())[1] try: new_branch_dictionary = {'new_int16_vector_branch': awkward.fromiter([[-1], [-2, 3], []]).astype(np.dtype('int16')), 'new_int32_vector_branch': awkward.fromiter([[-4], [-5, 6], []]).astype(np.dtype('int32')), 'new_float32_vector_branch': awkward.fromiter([[-7.7], [-8.8, 9.9], []]).astype(np.dtype('float32')), 'new_float64_vector_branch': awkward.fromiter([[-10.10], [-11.11, 12.12], []]).astype(np.dtype('float64'))} clone_tree(original_tree, new_filename, new_branches=new_branch_dictionary) new_file = uproot.open(new_filename) new_tree = new_file[treename] assert new_tree['int_vector_branch'].array().tolist() == [[], [-1, 2, 3], [13]] assert abs(new_tree['float_vector_branch'].array() - awkward.fromiter([[], [-7.7, 8.8, 9.9], [15.15]])).max().max() < 1e-5 assert abs(new_tree['double_vector_branch'].array() - awkward.fromiter([[], [-10.10, 11.11, 12.12], [16.16]])).max().max() < 1e-5 assert new_tree['new_int16_vector_branch'].array()[0].dtype == np.dtype('int16') assert new_tree['new_int16_vector_branch'].array().tolist() == [[-1], [-2, 3], []] assert new_tree['new_int32_vector_branch'].array()[0].dtype == np.dtype('int32') assert new_tree['new_int32_vector_branch'].array().tolist() == [[-4], [-5, 6], []] assert new_tree['new_float32_vector_branch'].array()[0].dtype == np.dtype('float32') assert abs(new_tree['new_float32_vector_branch'].array() - awkward.fromiter([[-7.7], [-8.8, 9.9], []])).max().max() < 1e-5 assert new_tree['new_float64_vector_branch'].array()[0].dtype == np.dtype('float64') assert abs(new_tree['new_float64_vector_branch'].array() - awkward.fromiter([[-10.10], [-11.11, 12.12], []])).max().max() < 1e-5 finally: if os.path.isfile(new_filename): os.remove(new_filename)
def main(): """ Loop over all combinations of mass and width. """ args = parse_input() masses = np.linspace( args.mass_min, args.mass_max, np.ceil((args.mass_max - args.mass_min) / args.mass_step) + 1) widths = np.linspace( args.width_min, args.width_max, np.ceil((args.width_max - args.width_min) / args.width_step) + 1) def generator(): with tqdm.tqdm(unit='event', total=masses.size * widths.size * args.nevents, desc='Generating') as pbar: for mass in masses: for width in widths: yield from run(args.nevents, mass, width) pbar.update(args.nevents) events = ak.fromiter(generator()) ak.save('events.awkd', events, mode='w')
def gen_reco_TLV(): gen_pt = awkward.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]]) reco_pt = awkward.fromiter([[20.2, 10.1, 30.3, 50.5], [50.5], [60]]) gen_eta = awkward.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]]) reco_eta = awkward.fromiter([[-2.2, -3.3, 2.2, 0.0], [0.0], [1.1]]) gen_phi = awkward.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]]) reco_phi = awkward.fromiter([[0.1, -1.4, 1.4, 0.78], [0.78], [-0.77]]) gen = uproot_methods.TLorentzVectorArray.from_ptetaphim( gen_pt, gen_eta, gen_phi, 0.2) reco = uproot_methods.TLorentzVectorArray.from_ptetaphim( reco_pt, reco_eta, reco_phi, 0.2) return (gen, reco)
def event(self, chunk): # get the data Hit_Chan, Hit_Start_Tick = chunk.tree.arrays( ['Hit_Chan', 'Hit_Start_Tick'], outputtype=tuple) # apply a mask if required if self.mask: mask = chunk.tree.array(self.mask) Hit_Chan = Hit_Chan[mask] Hit_Start_Tick = Hit_Start_Tick[mask] clusterNHits = [] # loop over events for chans, starts in zip(Hit_Chan, Hit_Start_Tick): # channels with hits in this event print("Hit channels : ", chans) # find the clusters clusters = np.split(chans, np.where(np.diff(chans) != 1)[0] + 1) # this will store a variable for each cluster n_hits_per_clus = [] # loop over clusters for i, clus in zip(range(len(clusters)), clusters): print(i, clus) n_hits_per_clus.append(len(clus)) # add the list of cluster variables to the output list clusterNHits.append(n_hits_per_clus) # store the new cluster variable in the event chunk.tree.new_variable(self.out_var, awkward.fromiter(clusterNHits)) return True
def write_root_file(self, particle_events, file_handle): print("Getting AWkward..") a = awkward.fromiter(particle_events) print("Writing ROOT file ...") file_handle["EVENT_NTUPLE"].extend({ "pulse_height": a.contents["pulse_height"], "chan": a.contents["chan"], "timestamp": a.contents["timestamp"], "hit_count": a.contents["hit_count"] }) # file_handle["EVENT_NTUPLE"]["pulse_height"].newbasket(a.contents["pulse_height"]) # file_handle["EVENT_NTUPLE"]["chan"].newbasket(a.contents["chan"]) # file_handle["EVENT_NTUPLE"]["timestamp"].newbasket(a.contents["timestamp"]) # file_handle["EVENT_NTUPLE"]["hit_count"].newbasket(a.contents["hit_count"]) # For more info on root_pandas : https://github.com/scikit-hep/root_pandas #pd_particle_events.to_root(output_filename, key='EVENT_NTUPLE') # write out pandas dataframe to ROOT file, yup, that's it... return 0
def get_histograms(list_of_files_, variable_list_, cuts_to_apply_=None): hist = OrderedDict() counts = OrderedDict() for sample in list_of_files_: hist[sample] = OrderedDict() counts[sample] = OrderedDict() for tree_name in list_of_files_[sample]['trees']: print('\nReserving Histograms for:', sample, tree_name) hist[sample][tree_name] = OrderedDict() counts[sample][tree_name] = OrderedDict() # Reserve histograms hist[sample][tree_name]['MET'] = rt.TH1D( 'MET_' + sample + '_' + tree_name, 'E_{T}^{miss} [GeV]', 500, 0, 1000) hist[sample][tree_name]['S_Flavor_jet'] = rt.TH1D( 'S_Flavor_jet_' + sample + '_' + tree_name, 'Flavor S jets', 20, 0, 20) hist[sample][tree_name]['ISR_Flavor_jet'] = rt.TH1D( 'ISR_Flavor_jet_' + sample + '_' + tree_name, 'Flavor ISR jets', 20, 0, 20) hist[sample][tree_name]['S_Flavor_lep'] = rt.TH1D( 'S_Flavor_lep_' + sample + '_' + tree_name, 'Flavor S leps', 20, 0, 20) hist[sample][tree_name]['ISR_Flavor_lep'] = rt.TH1D( 'ISR_Flavor_lep_' + sample + '_' + tree_name, 'Flavor ISR leps', 20, 0, 20) hist[sample][tree_name]['Lep_to_Charge'] = rt.TH2D( 'Lep_to_Charge_' + sample + '_' + tree_name, 'lep Flavor to Charge', 20, 0, 20, 5, -2, 2) hist[sample][tree_name]['Lep_to_Lep'] = rt.TH2D( 'Lep_to_Lep_' + sample + '_' + tree_name, '2leps to 2 opp leps', 2, 0, 2, 2, 0, 2) hist[sample][tree_name]['RISR'] = rt.TH1D( 'risr_' + sample + '_' + tree_name, 'RISR', 500, 0, 2) hist[sample][tree_name]['PTISR'] = rt.TH1D( 'ptisr_' + sample + '_' + tree_name, 'p_{T} ISR [GeV]', 500, 0, 1000) hist[sample][tree_name]['PTCM'] = rt.TH1D( 'ptcm_' + sample + '_' + tree_name, 'p_{T} CM [GeV]', 500, 0, 1000) hist[sample][tree_name]['RISR_PTISR'] = rt.TH2D( 'RISR_PTISR_' + sample + '_' + tree_name, 'RISR_PTISR', 500, 0, 2, 500, 0, 1000) hist[sample][tree_name]['RISR_PTCM'] = rt.TH2D( 'RISR_PTCM_' + sample + '_' + tree_name, 'RISR_PTCM', 500, 0, 2, 500, 0, 1000) hist[sample][tree_name]['PTCM_div_PTISR'] = rt.TH1D( 'PTCM_div_PTISR_' + sample + '_' + tree_name, 'PTCM_div_PTISR', 500, 0, 1) hist[sample][tree_name]['dphi_PTCM_div_PTISR'] = rt.TH2D( 'dphi_PTCM_div_PTISR_' + sample + '_' + tree_name, 'dphi_PTCM_div_PTISR', 500, 0, np.pi, 500, 0, 1) hist[sample][tree_name]['dphi_PTCM'] = rt.TH2D( 'dphi_PTCM_' + sample + '_' + tree_name, 'dphi_PTCM', 500, 0, np.pi, 500, 0, 1000) hist[sample][tree_name]['PTISR_PTCM'] = rt.TH2D( 'PTISR_PTCM_' + sample + '_' + tree_name, 'PTISR_PTCM', 500, 0, 1000, 500, 0, 1000) hist[sample][tree_name]['S_ISR_N_jet'] = rt.TH2D( 'S_ISR_N_jet_' + sample + '_' + tree_name, 'N jet, S-ISR', 15, 0, 15, 15, 0, 15) hist[sample][tree_name]['S_ISR_N_lep'] = rt.TH2D( 'S_ISR_N_lep_' + sample + '_' + tree_name, 'N lep, S-ISR', 15, 0, 15, 15, 0, 15) hist[sample][tree_name]['S_ISR_N_loose_jet'] = rt.TH2D( 'S_ISR_N_loose_jet_' + sample + '_' + tree_name, 'N loose S-ISR', 15, 0, 15, 15, 0, 15) hist[sample][tree_name]['S_ISR_N_medium_jet'] = rt.TH2D( 'S_ISR_N_medium_jet_' + sample + '_' + tree_name, 'N medium S-ISR', 15, 0, 15, 15, 0, 15) hist[sample][tree_name]['S_ISR_N_tight_jet'] = rt.TH2D( 'S_ISR_N_tight_jet_' + sample + '_' + tree_name, 'N tight S-ISR', 15, 0, 15, 15, 0, 15) hist[sample][tree_name]['RISR_N_jet'] = rt.TH2D( 'RISR_N_jet_' + sample + '_' + tree_name, 'RISR N jet', 500, 0, 2, 20, 0, 20) hist[sample][tree_name]['RISR_N_lep'] = rt.TH2D( 'RISR_N_lep_' + sample + '_' + tree_name, 'RISR N lep', 500, 0, 2, 20, 0, 20) hist[sample][tree_name]['RISR_N_S_jet'] = rt.TH2D( 'RISR_N_S_jet_' + sample + '_' + tree_name, 'RISR N S jet', 500, 0, 2, 20, 0, 20) hist[sample][tree_name]['RISR_N_S_lep'] = rt.TH2D( 'RISR_N_S_lep_' + sample + '_' + tree_name, 'RISR N S lep', 500, 0, 2, 20, 0, 20) hist[sample][tree_name]['RISR_N_ISR_jet'] = rt.TH2D( 'RISR_N_ISR_jet_' + sample + '_' + tree_name, 'RISR N ISR jet', 500, 0, 2, 20, 0, 20) hist[sample][tree_name]['RISR_N_ISR_lep'] = rt.TH2D( 'RISR_N_ISR_lep_' + sample + '_' + tree_name, 'RISR N ISR lep', 500, 0, 2, 20, 0, 20) hist[sample][tree_name]['PTISR_N_jet'] = rt.TH2D( 'PTISR_N_jet_' + sample + '_' + tree_name, 'PTISR N jet', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTISR_N_lep'] = rt.TH2D( 'PTISR_N_lep_' + sample + '_' + tree_name, 'PTISR N lep', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTISR_N_S_jet'] = rt.TH2D( 'PTISR_N_S_jet_' + sample + '_' + tree_name, 'PTISR N S jet', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTISR_N_S_lep'] = rt.TH2D( 'PTISR_N_S_lep_' + sample + '_' + tree_name, 'PTISR N S lep', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTISR_N_ISR_jet'] = rt.TH2D( 'PTISR_N_ISR_jet_' + sample + '_' + tree_name, 'PTISR N ISR jet', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTISR_N_ISR_lep'] = rt.TH2D( 'PTISR_N_ISR_lep_' + sample + '_' + tree_name, 'PTISR N ISR lep', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTCM_N_jet'] = rt.TH2D( 'PTCM_N_jet_' + sample + '_' + tree_name, 'PTCM N jet', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTCM_N_lep'] = rt.TH2D( 'PTCM_N_lep_' + sample + '_' + tree_name, 'PTCM N lep', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTCM_N_S_jet'] = rt.TH2D( 'PTCM_N_S_jet_' + sample + '_' + tree_name, 'PTCM N S jet', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTCM_N_S_lep'] = rt.TH2D( 'PTCM_N_S_lep_' + sample + '_' + tree_name, 'PTCM N S lep', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTCM_N_ISR_jet'] = rt.TH2D( 'PTCM_N_ISR_jet_' + sample + '_' + tree_name, 'PTCM N ISR jet', 500, 0, 1000, 20, 0, 20) hist[sample][tree_name]['PTCM_N_ISR_lep'] = rt.TH2D( 'PTCM_N_ISR_lep_' + sample + '_' + tree_name, 'PTCM N ISR lep', 500, 0, 1000, 20, 0, 20) i_entries = 0 for itree, in_tree in enumerate(list_of_files_[sample]['trees']): for events in ur.tree.iterate(list_of_files_[sample]['files'], in_tree, branches=variable_list_, entrysteps=10000): print('\nGetting Histograms for:', sample, tree_name) print('tree: ', itree + 1) i_entries += 10000 print(i_entries) print(events) pt_jet = events[b'PT_jet'] flavor_jet = events[b'Flavor_jet'] isr_index_jet = events[b'index_jet_ISR'] s_index_jet = events[b'index_jet_S'] bjet_tag = events[b'Btag_jet'] pt_lep = events[b'PT_lep'] ch_lep = events[b'Charge_lep'] id_lep = events[b'ID_lep'] pdgid_lep = events[b'PDGID_lep'] isr_index_lep = events[b'index_lep_ISR'] s_index_lep = events[b'index_lep_S'] met = events[b'MET'] risr = aw.fromiter(events[b'RISR']) ptisr = events[b'PTISR'] ptcm = events[b'PTCM'] dphi = events[b'dphiCMI'] weight = events[b'weight'] len_jet = pt_jet.stops - pt_jet.starts max_n_jets = np.amax(len_jet) # pt_jet = ([np.pad(jets, (0, max_n_jets - len(jets)), 'constant', constant_values=np.nan) for jets in pt_jet]) # flavor_jet = ([np.pad(jets, (0, max_n_jets - len(jets)), 'constant', constant_values=np.nan) for jets in flavor_jet]) # bjet_tag = ([np.pad(jets, (0, max_n_jets - len(jets)), 'constant', constant_values=np.nan) for jets in bjet_tag]) len_lep = pt_lep.stops - pt_lep.starts max_n_leps = np.amax(len_lep) # pt_lep = ([np.pad(leps, (0, max_n_leps - len(leps)), 'constant', constant_values=np.nan) for leps in pt_lep]) # ch_lep = ([np.pad(leps, (0, max_n_leps - len(leps)), 'constant', constant_values=0) for leps in ch_lep]) # pdgid_lep = ([np.pad(leps, (0, max_n_leps - len(leps)), 'constant', constant_values=np.nan) for leps in pdgid_lep]) only_2_leps = ([ True if lep == 2 else False for lep in len_lep ]) only_2_opp_leps = ([ True if lep == 2 and len(charge[charge > 0]) > 0 and len(charge[charge < 0]) > 0 else False for lep, charge in zip(only_2_leps, ch_lep) ]) isr_index_jet = np.array(isr_index_jet) s_index_jet = np.array(s_index_jet) isr_index_lep = np.array(isr_index_lep) s_index_lep = np.array(s_index_lep) risr = risr[:, 1] isr_index_jet = isr_index_jet[:, 1] s_index_jet = s_index_jet[:, 1] isr_index_lep = isr_index_lep[:, 1] s_index_lep = s_index_lep[:, 1] ptcm = ptcm.content[:, 1] dphi = dphi.content[:, 1] # risr_lepV_jetI = risr[:,0] # risr_lepV_jetA = risr[:,1] # risr_lepA_jetA = risr[:,2] print('\ncreating masks and weights') print('-> bjet masks') loose_mask = bjet_tag > 0.5426 medium_mask = bjet_tag > 0.8484 tight_mask = bjet_tag > 0.9535 has_2_loose = ([ True if len(mask[mask]) >= 2 else False for mask in loose_mask ]) has_2_medium = ([ True if len(mask[mask]) >= 2 else False for mask in medium_mask ]) has_2_tight = ([ True if len(mask[mask]) >= 2 else False for mask in tight_mask ]) print('-> S bjet masks') loose_s_mask = ([ mask[index] for mask, index in zip(loose_mask, s_index_jet) ]) medium_s_mask = ([ mask[index] for mask, index in zip(medium_mask, s_index_jet) ]) tight_s_mask = ([ mask[index] for mask, index in zip(tight_mask, s_index_jet) ]) print('-> ISR bjet masks') loose_isr_mask = ([ mask[index] for mask, index in zip(loose_mask, isr_index_jet) ]) medium_isr_mask = ([ mask[index] for mask, index in zip(medium_mask, isr_index_jet) ]) tight_isr_mask = ([ mask[index] for mask, index in zip(tight_mask, isr_index_jet) ]) print('-> event bjet masks') is_loose = ([np.any(event) for event in loose_mask]) is_medium = ([np.any(event) for event in medium_mask]) is_tight = ([np.any(event) for event in tight_mask]) print('-> jet weights') jet_weight = ([ np.array([np.float64(event)] * len(jets[~np.isnan(jets)])) for jets, event in zip(pt_jet, weight) ]) # jet_weight = ([np.pad(w, (0, max_n_jets - len(w)), 'constant', constant_values=np.nan) for w in jet_weight]) s_jet_weight = ([ jets[index] for jets, index in zip(jet_weight, s_index_jet) ]) isr_jet_weight = ([ jets[index] for jets, index in zip(jet_weight, isr_index_jet) ]) pt_s_jet = ([ jets[index] for jets, index in zip(pt_jet, s_index_jet) ]) pt_isr_jet = ([ jets[index] for jets, index in zip(pt_jet, isr_index_jet) ]) flavor_s_jet = ([ jets[index] for jets, index in zip(flavor_jet, s_index_jet) ]) flavor_isr_jet = ([ jets[index] for jets, index in zip(flavor_jet, isr_index_jet) ]) print('-> lep weights') lep_weight = ([ np.array([np.float64(event)] * len(leps[~np.isnan(leps)])) for leps, event in zip(pt_lep, weight) ]) # lep_weight = ([np.pad(w, (0, max_n_leps - len(w)), 'constant', constant_values=np.nan) for w in lep_weight]) s_lep_weight = ([ leps[index] for leps, index in zip(lep_weight, s_index_lep) ]) isr_lep_weight = ([ leps[index] for leps, index in zip(lep_weight, isr_index_lep) ]) pt_s_lep = ([ leps[index] for leps, index in zip(pt_lep, s_index_lep) ]) pt_isr_lep = ([ leps[index] for leps, index in zip(pt_lep, isr_index_lep) ]) pdgid_s_lep = ([ leps[index] for leps, index in zip(pdgid_lep, s_index_lep) ]) pdgid_isr_lep = ([ leps[index] for leps, index in zip(pdgid_lep, isr_index_lep) ]) print('\napplying masks') print('-> jet pt') loose_pt_jet = ([ jet[mask] for jet, mask in zip(pt_jet, loose_mask) ]) medium_pt_jet = ([ jet[mask] for jet, mask in zip(pt_jet, medium_mask) ]) tight_pt_jet = ([ jet[mask] for jet, mask in zip(pt_jet, tight_mask) ]) print('-> N S jets') n_s_jet = ([len(jets[~np.isnan(jets)]) for jets in pt_s_jet]) n_s_loose_jet = ([ len(jets[mask]) for jets, mask in zip(pt_s_jet, loose_s_mask) ]) n_s_medium_jet = ([ len(jets[mask]) for jets, mask in zip(pt_s_jet, medium_s_mask) ]) n_s_tight_jet = ([ len(jets[mask]) for jets, mask in zip(pt_s_jet, tight_s_mask) ]) print('-> N ISR jets') n_isr_jet = ([ len(jets[~np.isnan(jets)]) for jets in pt_isr_jet ]) n_isr_loose_jet = ([ len(jets[mask]) for jets, mask in zip(pt_isr_jet, loose_isr_mask) ]) n_isr_medium_jet = ([ len(jets[mask]) for jets, mask in zip(pt_isr_jet, medium_isr_mask) ]) n_isr_tight_jet = ([ len(jets[mask]) for jets, mask in zip(pt_isr_jet, tight_isr_mask) ]) print('-> N S leps') n_s_lep = ([len(leps[~np.isnan(leps)]) for leps in pt_s_lep]) print('-> N ISR leps') n_isr_lep = ([ len(leps[~np.isnan(leps)]) for leps in pt_isr_lep ]) print('-> Event variables') ptcm_div_ptisr = np.divide(ptcm, ptisr) print('-> jet weights') # loose_weight = weight[is_loose] # medium_weight = weight[is_medium] # tight_weight = weight[is_tight] # # loose_jet_weight = ([w[mask] for w, mask in zip(jet_weight, loose_mask)]) # medium_jet_weight = ([w[mask] for w, mask in zip(jet_weight, medium_mask)]) # tight_jet_weight = ([w[mask] for w, mask in zip(jet_weight, tight_mask)]) # # loose_s_jet_weight = ([w[mask] for w, mask in zip(s_jet_weight, loose_s_mask)]) # medium_s_jet_weight = ([w[mask] for w, mask in zip(s_jet_weight, medium_s_mask)]) # tight_s_jet_weight = ([w[mask] for w, mask in zip(s_jet_weight, tight_s_mask)]) # # loose_isr_jet_weight = (w[mask] for w, mask in zip(isr_jet_weight, loose_isr_mask)]) # medium_isr_jet_weight = (w[mask] for w, mask in zip(isr_jet_weight, medium_isr_mask)]) # tight_isr_jet_weight = (w[mask] for w, mask in zip(isr_jet_weight, tight_isr_mask)]) print('-> Overall selection mask') evt_selection_mask = ([ True if np.all([lep_mask, b_mask]) else False for lep_mask, b_mask in zip(only_2_leps, is_medium) ]) risr = risr[evt_selection_mask] ptisr = ptisr[evt_selection_mask] ptcm = ptcm[evt_selection_mask] met = met[evt_selection_mask] lep_weight = lep_weight[evt_selection_mask] pdgid_lep = pdgid_lep[evt_selection_mask] ch_lep = ch_lep[evt_selection_mask] flavor_jet = flavor_jet[evt_selection_mask] flavor_s_jet = flavor_s_jet[evt_selection_mask] flavor_isr_jet = flavor_isr_jet[evt_selection_mask] pdgid_s_lep = pdgid_s_lep[evt_selection_mask] pdgid_isr_lep = pdgid_isr_lep[evt_selection_mask] dphi = dphi[evt_selection_mask] ptcm_div_ptisr = ptcm_div_ptisr[evt_selection_mask] n_s_jet = n_s_jet[evt_selection_mask] n_s_loose_jet = n_s_loose_jet[evt_selection_mask] n_s_medium_jet = n_s_medium_jet[evt_selection_mask] n_s_tight_jet = n_s_tight_jet[evt_selection_mask] n_s_lep = n_s_lep[evt_selection_mask] n_isr_jet = n_isr_jet[evt_selection_mask] n_isr_loose_jet = n_isr_loose_jet[evt_selection_mask] n_isr_medium_jet = n_isr_medium_jet[evt_selection_mask] n_isr_tight_jet = n_isr_tight_jet[evt_selection_mask] n_isr_lep = n_isr_lep[evt_selection_mask] len_jet = len_jet[evt_selection_mask] len_lep = len_lep[evt_selection_mask] only_lep_weight = weight weight = weight[evt_selection_mask] s_jet_weight = s_jet_weight[evt_selection_mask] isr_jet_weight = isr_jet_weight[evt_selection_mask] s_lep_weight = s_lep_weight[evt_selection_mask] isr_lep_weight = isr_lep_weight[evt_selection_mask] print('done applying masks') print('\nfilling histograms') if not np.any(evt_selection_mask): print('finished filling') continue # rnp.fill_hist(hist[sample][tree_name]['MET'], met, weight) # rnp.fill_hist(hist[sample][tree_name]['S_Flavor_jet'], flavor_s_jet, s_jet_weight) # rnp.fill_hist(hist[sample][tree_name]['ISR_Flavor_jet'], flavor_isr_jet, isr_jet_weight) # # rnp.fill_hist(hist[sample][tree_name]['S_Flavor_lep'], pdgid_s_lep, s_lep_weight) # rnp.fill_hist(hist[sample][tree_name]['ISR_Flavor_lep'], pdgid_isr_lep, isr_lep_weight) # # rnp.fill_hist(hist[sample][tree_name]['Lep_to_Charge'], np.swapaxes([pdgid_lep, ch_lep],0,1), lep_weight) # rnp.fill_hist(hist[sample][tree_name]['Lep_to_Lep'], np.swapaxes([only_2_leps, only_2_opp_leps],0,1), only_lep_weight) # # rnp.fill_hist(hist[sample][tree_name]['RISR'], risr, weight) # rnp.fill_hist(hist[sample][tree_name]['PTISR'], ptisr, weight) # rnp.fill_hist(hist[sample][tree_name]['PTCM'], ptcm, weight) # # rnp.fill_hist(hist[sample][tree_name]['RISR_PTCM'], np.swapaxes([risr,ptcm],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['RISR_PTISR'], np.swapaxes([risr,ptisr],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['PTISR_PTCM'], np.swapaxes([ptisr,ptcm],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['dphi_PTCM'], np.swapaxes([dphi,ptcm],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['dphi_PTCM_div_PTISR'], np.swapaxes([div_dphi,ptcm_div_ptisr],0,1), div_weight) # rnp.fill_hist(hist[sample][tree_name]['PTCM_div_PTISR'], ptcm_div_ptisr, div_weight) # # rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_jet'], np.swapaxes([n_s_jet,n_isr_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_lep'], np.swapaxes([n_s_lep,n_isr_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_loose_jet'], np.swapaxes([n_s_loose_jet,n_isr_loose_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_medium_jet'], np.swapaxes([n_s_medium_jet,n_isr_medium_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['S_ISR_N_tight_jet'], np.swapaxes([n_s_tight_jet,n_isr_tight_jet],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['RISR_N_jet'], np.swapaxes([risr,len_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['RISR_N_lep'], np.swapaxes([risr,len_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['RISR_N_S_jet'], np.swapaxes([risr,n_s_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['RISR_N_S_lep'], np.swapaxes([risr,n_s_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['RISR_N_ISR_jet'], np.swapaxes([risr,n_isr_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['RISR_N_ISR_lep'], np.swapaxes([risr,n_isr_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['PTISR_N_jet'], np.swapaxes([ptisr,len_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['PTISR_N_lep'], np.swapaxes([ptisr,len_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['PTISR_N_S_jet'], np.swapaxes([ptisr,n_s_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['PTISR_N_S_lep'], np.swapaxes([ptisr,n_s_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['PTISR_N_ISR_jet'], np.swapaxes([ptisr,n_isr_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['PTISR_N_ISR_lep'], np.swapaxes([ptisr,n_isr_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['PTCM_N_jet'], np.swapaxes([ptcm,len_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['PTCM_N_lep'], np.swapaxes([ptcm,len_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['PTCM_N_S_jet'], np.swapaxes([ptcm,n_s_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['PTCM_N_S_lep'], np.swapaxes([ptcm,n_s_lep],0,1), weight) # # rnp.fill_hist(hist[sample][tree_name]['PTCM_N_ISR_jet'], np.swapaxes([ptcm,n_isr_jet],0,1), weight) # rnp.fill_hist(hist[sample][tree_name]['PTCM_N_ISR_lep'], np.swapaxes([ptcm,n_isr_lep],0,1), weight) print('finished filling') return hist
def run(self): print('[Bu2KJpsi2KMuMuAnalyzer::run] INFO: Running the analyzer...') self.print_timestamp() for ifile, filename in enumerate(self._input_files): print('[Bu2KJpsi2KMuMuAnalyzer::run] INFO: FILE: {}/{}. Getting branches from file...'.format(ifile, len(self._input_files))) tree = uproot.open(filename)['Events'] self._bu_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._bu_branchnames).items()} self._event_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._event_branchnames).items()} self._muon_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._muon_branchnames).items()} self._track_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._track_branchnames).items()} self._gen_branches = {key: awkward.fromiter(branch) for key, branch in tree.arrays(self._gen_branchnames).items()} print('[Bu2KJpsi2KMuMuAnalyzer::run] INFO: FILE: {}/{}. Analyzing...'.format(ifile, len(self._input_files))) # Muon information self._muon_branches["Muon_isTriggeringBool"] = (self._muon_branches["Muon_isTriggering"] == 1) fill_hist(self._mu_histograms["nMuon"], self._muon_branches["Muon_pt"].count()) fill_hist(self._mu_histograms["nMuon_isTrig"], self._muon_branches["Muon_pt"][self._muon_branches["Muon_isTriggeringBool"]].count()) fill_hist(self._mu_histograms["Muon_pt"], self._muon_branches["Muon_pt"].flatten()) fill_hist(self._mu_histograms["Muon_pt_isTrig"], self._muon_branches["Muon_pt"][self._muon_branches["Muon_isTriggeringBool"]].flatten()) # Tag/probe determination isTrig_mu1 = self._muon_branches["Muon_isTriggering"][self._bu_branches["BToKMuMu_l1Idx"]] # shape=BToKMuMu isTrig_mu2 = self._muon_branches["Muon_isTriggering"][self._bu_branches["BToKMuMu_l2Idx"]] # shape=BToKMuMu bu_trig_count = isTrig_mu1 + isTrig_mu2 # shape=BToKMuMu total_trig_count = self._muon_branches["Muon_isTriggering"].sum() # shape=Event simple array total_trig_count_bushape = bu_trig_count.ones_like() * total_trig_count tag_count = total_trig_count_bushape - bu_trig_count self._bu_branches["BToKMuMu_isTag"] = (isTrig_mu1 == 1) | (isTrig_mu2 == 1) self._bu_branches["BToKMuMu_isProbe"] = (tag_count >= 1) if ifile == 0: print("Muon debug info:") print(self._event_branches["nMuon"]) print(self._muon_branches["Muon_pt"].count()[:6]) print(self._muon_branches["Muon_pt"][:6]) #print(self._muon_branches["Muon_isTriggering"]) print(self._muon_branches["Muon_isTriggeringBool"][:6]) print (self._muon_branches["Muon_pt"][self._muon_branches["Muon_isTriggeringBool"]][:6]) print("BToKMuMu_l1Idx = ") print(self._bu_branches["BToKMuMu_l1Idx"][:6]) print("BToKMuMu_l2Idx = ") print(self._bu_branches["BToKMuMu_l2Idx"][:6]) print("Total_trig_count = ") print(self._muon_branches["Muon_isTriggering"].sum()[:6]) print("isTrig_mu1 = ") print(self._muon_branches["Muon_isTriggering"][self._bu_branches["BToKMuMu_l1Idx"]][:6]) print("isTrig_mu2 = ") print(self._muon_branches["Muon_isTriggering"][self._bu_branches["BToKMuMu_l2Idx"]][:6]) print("bu_trig_count = ") print(bu_trig_count[:6]) print("total_trig_count_bushape = ") print(total_trig_count_bushape[:6]) print("tag_count = ") print(tag_count[:6]) print "isTag:" print(self._bu_branches["BToKMuMu_isTag"][:6]) print "isProbe:" print(self._bu_branches["BToKMuMu_isProbe"][:6]) # MC truth matching self._bu_branches["BToKMuMu_l1_genIdx"] = self._muon_branches["Muon_genPartIdx"][self._bu_branches["BToKMuMu_l1Idx"]] self._bu_branches["BToKMuMu_l2_genIdx"] = self._muon_branches["Muon_genPartIdx"][self._bu_branches["BToKMuMu_l2Idx"]] self._bu_branches['BToKMuMu_k_genIdx'] = self._track_branches['ProbeTracks_genPartIdx'][self._branches['BToKMuMu_kIdx']] self._bu_branches['BToKMuMu_l1_genMotherIdx'] = where(self._bu_branches["BToKMuMu_l1_genIdx"] >= 0, self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches["BToKMuMu_l1_genIdx"]], -1) self._bu_branches['BToKMuMu_l2_genMotherIdx'] = where(self._bu_branches["BToKMuMu_l2_genIdx"] >= 0, self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches["BToKMuMu_l2_genIdx"]], -1) self._bu_branches['BToKMuMu_k_genMotherIdx'] = where(self._bu_branches["BToKMuMu_k_genIdx"] >= 0, self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches["BToKMuMu_k_genIdx"]], -1) self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'] = where(self._bu_branches['BToKMuMu_l1_genMotherIdx'] >= 0, self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches['BToKMuMu_l1_genMotherIdx']], -1) self._bu_branches['BToKMuMu_l2_genGrandmotherIdx'] = where(self._bu_branches['BToKMuMu_l2_genMotherIdx'] >= 0, self._gen_branches["GenPart_genPartIdxMother"][self._bu_branches['BToKMuMu_l2_genMotherIdx']], -1) self._bu_branches['BToKMuMu_l1_genMotherPdgId'] = where(self._bu_branches['BToKMuMu_l1_genMotherIdx'] >= 0, self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_l1_genMotherIdx']], -1) self._bu_branches['BToKMuMu_l2_genMotherPdgId'] = where(self._bu_branches['BToKMuMu_l2_genMotherIdx'] >= 0, self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_l2_genMotherIdx']], -1) self._bu_branches['BToKMuMu_k_genMotherPdgId'] = where(self._bu_branches['BToKMuMu_k_genMotherIdx'] >= 0, self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_k_genMotherIdx']], -1) self._bu_branches['BToKMuMu_l1_genGrandmotherPdgId'] = where(self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'] >= 0, self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_l1_genGrandmotherIdx']], -1) self._bu_branches['BToKMuMu_l2_genGrandmotherPdgId'] = where(self._bu_branches['BToKMuMu_l2_genGrandmotherIdx'] >= 0, self._gen_branches["GenPart_pdgId"][self._bu_branches['BToKMuMu_l2_genGrandmotherIdx']], -1) self._bu_branches['BToKMuMu_mcmatch'] = (self._bu_branches['BToKMuMu_l1_genMotherPdgId'] == 443) & (self._bu_branches['BToKMuMu_l2_genMotherPdgId'] == 443) & (self._bu_branches['BToKMuMu_l2_genGrandmotherPdgId'] == 521) & (self._bu_branches['BToKMuMu_l2_genGrandmotherPdgId'] == 521) & (self._bu_branches['BToKMuMu_k_genMotherPdgId'] == 521) & (self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'] == self._bu_branches['BToKMuMu_l2_genGrandmotherIdx']) & (self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'] == self._bu_branches['BToKMuMu_k_genGrandmotherIdx']) self._bu_branches["BToKMuMu_genPartIdx"] = where(self._bu_branches['BToKMuMu_mcmatch'], self._bu_branches['BToKMuMu_l1_genGrandmotherIdx'], -1) self._butruth_branches = {} self._butruth_branches["TruthBToKMuMu_RecoIdx"] = self._gen_branches["GenPart_pdg"] # Add trigger decision to Bs candidates self._bu_branches["BToKMuMu_{}".format(self._trigger)] = np.repeat(self._event_branches[self._trigger], self._event_branches["nBToKMuMu"]) # Print out length of arrays #for branch, array in self._bu_branches.items(): # print("{}\t{}".format(len(array.flatten()), branch)) # flatten the jagged arrays to a normal numpy array, turn the whole dictionary to pandas dataframe self._bu_branches = pd.DataFrame.from_dict({branch: array.flatten() for branch, array in self._bu_branches.items()}) # Minimum lepton pT self._bu_branches["BToKMuMu_fit_l_minpt"] = np.minimum(self._bu_branches["BToKMuMu_fit_l1_pt"], self._bu_branches["BToKMuMu_fit_l2_pt"]) # general selection trigger_selection = self._bu_branches['BToKMuMu_{}'.format(self._trigger)] tag_selection = self._bu_branches["BToKMuMu_isTag"] & trigger_selection probe_selection = self._bu_branches["BToKMuMu_isProbe"] & trigger_selection sv_selection = (self._bu_branches['BToKMuMu_fit_pt'] > 3.0) \ & (np.abs(self._bu_branches['BToKMuMu_l_xy'] / self._bu_branches['BToKMuMu_l_xy_unc']) > 3.0 ) \ & (self._bu_branches['BToKMuMu_svprob'] > 0.1) \ & (self._bu_branches['BToKMuMu_fit_cos2D'] > 0.9) l1_selection = (self._bu_branches['BToKMuMu_fit_l1_pt'] > 1.5) \ & (np.abs(self._bu_branches['BToKMuMu_fit_l1_eta']) < 2.4) l2_selection = (self._bu_branches['BToKMuMu_fit_l2_pt'] > 1.5) \ & (np.abs(self._bu_branches['BToKMuMu_fit_l2_eta']) < 2.4) k_selection = (self._bu_branches['BToKMuMu_fit_k_pt'] > 0.5) \ & (np.abs(self._bu_branches['BToKMuMu_fit_k_eta']) < 2.5) jpsi_selection = (JPSI_1S_MASS - 0.2 < self._bu_branches['BToKMuMu_mll_fullfit']) & (self._bu_branches['BToKMuMu_mll_fullfit'] < JPSI_1S_MASS + 0.2) bu_selection = sv_selection & l1_selection & l2_selection & k_selection & jpsi_selection #print("N trigger_selection = {}".format(trigger_selection.sum())) #print("N tag_selection = {}".format(tag_selection.sum())) #print("N probe_selection = {}".format(probe_selection.sum())) #print("N sv_selection = {}".format(sv_selection.sum())) #print("N l1_selection = {}".format(l1_selection.sum())) #print("N l2_selection = {}".format(l2_selection.sum())) #print("N k_selection = {}".format(k_selection.sum())) #print("N jpsi_selection = {}".format(jpsi_selection.sum())) #print("N bu_selection = {}".format(bu_selection.sum())) ''' if self._isMC: pass mc_matched_selection = (self._branches['BsToKKMuMu_l1_genPartIdx'] >= 0) \ & (self._branches['BsToKKMuMu_l2_genPartIdx'] >= 0) \ & (self._branches['BsToKKMuMu_k_genPartIdx'] >= 0) # B->K J/psi(ee) #mc_parent_selection = (abs(self._branches['BsToKKMuMu_l1_genMotherPdgId']) == 443) & (abs(self._branches['BsToKKMuMu_k_genMotherPdgId']) == 521) #mc_chain_selection = (self._branches['BsToKKMuMu_l1_genMotherPdgId'] == self._branches['BsToKKMuMu_l2_genMotherPdgId']) & (self._branches['BsToKKMuMu_k_genMotherPdgId'] == self._branches['BsToKKMuMu_l1Mother_genMotherPdgId']) & (self._branches['BsToKKMuMu_k_genMotherPdgId'] == self._branches['BsToKKMuMu_l2Mother_genMotherPdgId']) # B->K*(K pi) J/psi(ee) mc_parent_selection = (abs(self._branches['BsToKKMuMu_l1_genMotherPdgId']) == 443) & (abs(self._branches['BsToKKMuMu_k_genMotherPdgId']) == 313) mc_chain_selection = (self._branches['BsToKKMuMu_l1_genMotherPdgId'] == self._branches['BsToKKMuMu_l2_genMotherPdgId']) mc_selection = mc_matched_selection & mc_parent_selection & mc_chain_selection #additional_selection = b_sb_selection if self._isMC: selection = l1_selection & l2_selection & k_selection & mc_selection else: selection = l1_selection & l2_selection & k_selection ''' for tag_type in ["inclusive", "triggered", "tag", "probe"]: this_selection = copy.deepcopy(bu_selection) if tag_type == "triggered": this_selection &= trigger_selection elif tag_type == "tag": this_selection &= tag_selection elif tag_type == "probe": this_selection &= probe_selection #print("tag_type {}".format(tag_type)) #print("\tthis_selection.count = {}".format(this_selection.sum())) selected_branches = self._bu_branches[this_selection] fill_hist(self._histograms[tag_type]['BToKMuMu_chi2'], selected_branches['BToKMuMu_chi2'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_eta'], selected_branches['BToKMuMu_eta'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_cos2D'], selected_branches['BToKMuMu_fit_cos2D'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_eta'], selected_branches['BToKMuMu_fit_eta'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_mass'], selected_branches['BToKMuMu_fit_mass'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_phi'], selected_branches['BToKMuMu_fit_phi'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_pt'], selected_branches['BToKMuMu_fit_pt'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_l_xy'], selected_branches['BToKMuMu_l_xy'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_l_xy_sig'], selected_branches['BToKMuMu_l_xy'].values / selected_branches['BToKMuMu_l_xy_unc'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l1_eta'], selected_branches['BToKMuMu_fit_l1_eta'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l1_phi'], selected_branches['BToKMuMu_fit_l1_phi'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l1_pt'], selected_branches['BToKMuMu_fit_l1_pt'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l2_eta'], selected_branches['BToKMuMu_fit_l2_eta'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l2_phi'], selected_branches['BToKMuMu_fit_l2_phi'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l2_pt'], selected_branches['BToKMuMu_fit_l2_pt'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_mass'], selected_branches['BToKMuMu_mass'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_mll_fullfit'], selected_branches['BToKMuMu_mll_fullfit'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_mll_llfit'], selected_branches['BToKMuMu_mll_llfit'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_mll_raw'], selected_branches['BToKMuMu_mll_raw'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_phi'], selected_branches['BToKMuMu_phi'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_pt'], selected_branches['BToKMuMu_pt'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_svprob'], selected_branches['BToKMuMu_svprob'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_charge'], selected_branches['BToKMuMu_charge'].values) fill_hist(self._histograms[tag_type]['BToKMuMu_fit_l_minpt'], selected_branches["BToKMuMu_fit_l_minpt"].values) # End loop tag_Type # Debug absence of low-pT probes if ifile == 0: select_lowpt = (self._bu_branches["BToKMuMu_fit_pt"] < 10.) select_lowpt_probe = (self._bu_branches["BToKMuMu_fit_pt"] < 10.) & self._bu_branches["BToKMuMu_isProbe"] print("pT of Bus with pT<10 and isProbe") print(self._bu_branches["BToKMuMu_fit_pt"][select_lowpt_probe]) # Cutflow cutflow_selection = np.ones_like(self._bu_branches["BToKMuMu_chi2"], dtype=int) self._cutflow_counts["Inclusive"] += cutflow_selection.sum() self._cutflow_counts[self._trigger] += trigger_selection.sum() # Inclusive branch cutflow_selection_inclusive = copy.deepcopy(cutflow_selection) cutflow_selection_inclusive &= sv_selection self._cutflow_counts["Inclusive SV"] += cutflow_selection_inclusive.sum() cutflow_selection_inclusive &= l1_selection & l2_selection & k_selection self._cutflow_counts["Inclusive mu-K"] += cutflow_selection_inclusive.sum() cutflow_selection_inclusive &= jpsi_selection self._cutflow_counts["Inclusive Jpsi"] += cutflow_selection_inclusive.sum() # Tag branch cutflow_selection_tag = cutflow_selection & tag_selection self._cutflow_counts["Tag"] += cutflow_selection_tag.sum() cutflow_selection_tag = cutflow_selection_tag & sv_selection self._cutflow_counts["Tag SV"] += cutflow_selection_tag.sum() cutflow_selection_tag = cutflow_selection_tag & l1_selection & l2_selection & k_selection self._cutflow_counts["Tag mu-K"] += cutflow_selection_tag.sum() cutflow_selection_tag = cutflow_selection_tag & jpsi_selection self._cutflow_counts["Tag Jpsi"] += cutflow_selection_tag.sum() # Probe branch cutflow_selection_probe = cutflow_selection & probe_selection self._cutflow_counts["Probe"] += cutflow_selection_probe.sum() cutflow_selection_probe = cutflow_selection_probe & sv_selection self._cutflow_counts["Probe SV"] += cutflow_selection_probe.sum() cutflow_selection_probe = cutflow_selection_probe & l1_selection & l2_selection & k_selection self._cutflow_counts["Probe mu-K"] += cutflow_selection_probe.sum() cutflow_selection_probe = cutflow_selection_probe & jpsi_selection self._cutflow_counts["Probe Jpsi"] += cutflow_selection_probe.sum()
def main(args): directed = False path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'training_data', 'single_photon') full_dataset = HitGraphDatasetG(path, directed=directed) fulllen = len(full_dataset) tv_frac = 0.10 tv_num = math.ceil(fulllen*tv_frac) splits = np.cumsum([fulllen-2*tv_num,tv_num,tv_num]) test_dataset = torch.utils.data.Subset(full_dataset,np.arange(start=splits[0],stop=splits[1])) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) test_samples = len(test_dataset) d = full_dataset num_features = d.num_features num_classes = d[0].y.max().item() + 1 if d[0].y.dim() == 1 else d[0].y.size(1) model = EdgeNet(input_dim=num_features,hidden_dim=hidden_dim,n_iters=n_iters).to(device) model_fname = args.model print('Model: \n%s\nParameters: %i' % (model, sum(p.numel() for p in model.parameters()))) print('Testing with %s samples'%test_samples) model.load_state_dict(torch.load(model_fname)) test_loss, test_acc, test_eff, test_fp, test_fn, test_pur = test(model, test_loader, test_samples) print('Testing: Loss: {:.4f}, Eff.: {:.4f}, FalsePos: {:.4f}, FalseNeg: {:.4f}, Purity: {:,.4f}'.format(test_loss, test_eff, test_fp, test_fn, test_pur)) # plotting: figs = [] out = [] y = [] x = [] edge_index = [] #simmatched = [] for i,data in enumerate(test_loader): x.append(data.x.cpu().detach().numpy()) y.append(data.y.cpu().detach().numpy()) edge_index.append(data.edge_index.cpu().detach().numpy()) #simmatched.append(data.sim_matched.cpu().detach().numpy()) data = data.to(device) out.append(model(data).cpu().detach().numpy()) print("Processing awkward arrays...") out = awkward.fromiter(out) x = awkward.fromiter(x) y = awkward.fromiter(y) #simmatched = awkward.fromiter(simmatched) edge_index = awkward.fromiter(edge_index) predicted_edge = (out > 0.5) truth_edge = (y > 0.5) node_energy = x[:,:,4] node_layer = x[:,:,2] predicted_connected_node_indices = awkward.JaggedArray.concatenate([edge_index[:,0][predicted_edge], edge_index[:,1][predicted_edge]], axis=1) predicted_connected_node_indices = awkward.fromiter(map(np.unique, predicted_connected_node_indices)) predicted_energy_sum = node_energy[predicted_connected_node_indices].sum() truth_connected_node_indices = awkward.JaggedArray.concatenate([edge_index[:,0][truth_edge],edge_index[:,1][truth_edge]], axis=1) truth_connected_node_indices = awkward.fromiter(map(np.unique, truth_connected_node_indices)) truth_energy_sum = node_energy[truth_connected_node_indices].sum() print ("Plotting...") nonzeromask = (truth_energy_sum !=0.0) energy_captured_ratio = predicted_energy_sum[nonzeromask]/truth_energy_sum[nonzeromask] fig,axes = plt.subplots(figsize=(12, 7)) _, bins,_ = axes.hist(energy_captured_ratio, bins=100) axes.set_title("Ratio of energy sum for predicted hits/truth (preprocessed) hits") axes.set_ylabel("events (pos+neg)") axes.set_xlabel("Ratio") cut = energy_captured_ratio[(energy_captured_ratio>0.975) & (energy_captured_ratio < 1.025)] #NB: restricted fit (mu, sigma) = stats.norm.fit(cut) c_paras = stats.crystalball.fit(cut) lnspc = np.linspace(bins[0], bins[-1], len(bins)) pdf_g = stats.norm.pdf(lnspc, mu, sigma) pdf_c = stats.crystalball.pdf(lnspc, *c_paras) pdf_g = pdf_g / pdf_g.sum() * len(energy_captured_ratio) pdf_c = pdf_c / pdf_c.sum() * len(energy_captured_ratio) axes.plot(lnspc, pdf_g, label="Norm, restricted fit") axes.plot(lnspc, pdf_c, label="Crystalball, restricted fit") axes.legend(loc='upper left') figs.append(fig) idx = 0 print("diagnostics:", x[idx].regular().shape, edge_index[idx].regular().shape, y[idx].shape ) #uncomment for visualisation - warning: slow #idxs = [0] #for idx in tqdm.tqdm(idxs): # fig = draw_sample(x[idx].regular(), edge_index[idx].regular()[0], edge_index[idx].regular()[1], y[idx], out[idx]) # figs.append(fig) import matplotlib.backends.backend_pdf pdf = matplotlib.backends.backend_pdf.PdfPages("test_plots.pdf") for fig in figs: pdf.savefig(fig) pdf.close()