def test_objfunc_jptshift(module, event, inputs, outputs): event.nsig = inputs["nsig"] event.source = inputs["source"] event.Jet.pt = awk.JaggedArray( inputs["starts"], inputs["stops"], inputs["jpt"], ) for key, val in inputs["evvars"].items(): setattr(event.Jet, key, awk.JaggedArray( inputs["starts"], inputs["stops"], val, )) module.begin(event) jptshift = event.Jet_ptShift(event) assert np.array_equal(jptshift.starts, np.array(inputs["starts"])) assert np.array_equal(jptshift.stops, np.array(inputs["stops"])) assert np.allclose( jptshift.content, np.array(outputs["jptshift"]), rtol=1e-6, equal_nan=True, )
def test_objfunc_tptshift(module, event, inputs, outputs): event.nsig = inputs["nsig"] event.source = inputs["source"] tpt = awk.JaggedArray(inputs["starts"], inputs["stops"], np.array(inputs["tpt"], dtype=np.float32)) event.Tau_pt = tpt event.Tau.pt = tpt tdm = awk.JaggedArray(inputs["starts"], inputs["stops"], np.array(inputs["tdm"], dtype=np.float32)) event.Tau_decayMode = tdm event.Tau.decayMode = tdm for key, val in inputs["evvars"].items(): jagarr = awk.JaggedArray(inputs["starts"], inputs["stops"], np.array(val, dtype=np.float32)) setattr(event.Tau, key, jagarr) setattr(event, "Tau_{}".format(key), jagarr) module.begin(event) tptshift = event.Tau_ptShift(event, event.source, event.nsig) assert np.array_equal(tptshift.starts, np.array(inputs["starts"])) assert np.array_equal(tptshift.stops, np.array(inputs["stops"])) assert np.allclose( tptshift.content, np.array(outputs["tptshift"]), rtol=1e-6, equal_nan=True, )
def test_objfunc_yptshift(module, event, inputs, outputs): event.nsig = inputs["nsig"] event.source = inputs["source"] event.Photon.pt = awk.JaggedArray( inputs["starts"], inputs["stops"], inputs["ypt"], ) for key, val in inputs["evvars"].items(): jagarr = awk.JaggedArray(inputs["starts"], inputs["stops"], val) setattr(event.Photon, key, jagarr) setattr(event, "Photon_{}".format(key), jagarr) module.begin(event) yptshift = event.Photon_ptShift(event) assert np.array_equal(yptshift.starts, np.array(inputs["starts"])) assert np.array_equal(yptshift.stops, np.array(inputs["stops"])) assert np.allclose( yptshift.content, np.array(outputs["yptshift"]), rtol=1e-6, equal_nan=True, )
def do_jes_correction(self, event, source): df = self.jesuncs[self.jesuncs["source"] == source] indices = get_bin_indices( [event.Jet_eta.content], [df["eta_low"].values], [df["eta_high"].values], 1, )[:, 0] pt = np.array(list(df.iloc[indices]["pt"].values)) corr_up = np.array(list(df.iloc[indices]["corr_up"].values)) corr_down = np.array(list(df.iloc[indices]["corr_down"].values)) corr_up = interpolate(event.Jet_ptJESOnly.content, pt, corr_up) corr_down = interpolate(event.Jet_ptJESOnly.content, pt, corr_down) starts = event.Jet_eta.starts stops = event.Jet_eta.stops setattr(event, "Jet_JECjes{}Up".format(source), awk.JaggedArray( starts, stops, corr_up, )) setattr(event, "Jet_JECjes{}Down".format(source), awk.JaggedArray( starts, stops, -1. * corr_down, ))
def objUnion(objs, sortBy=None): objs = tuple(objs) assert objs assert all(isinstance(obj, awkward.JaggedArray) for obj in objs) assert all(obj.shape == objs[0].shape for obj in objs) tags = awkward.JaggedArray.concatenate( [ awkward.JaggedArray(obj.starts, obj.stops, np.full(obj.shape, i, dtype=np.intp)) for i, obj in enumerate(objs) ], axis=1, ) index = awkward.JaggedArray.concatenate( [ awkward.JaggedArray(obj.starts, obj.stops, np.arange(len(obj.content), dtype=np.intp)) for obj in objs ], axis=1, ) if sortBy: if not callable(sortBy): sortBy = attrgetter(sortBy) shuf = awkward.JaggedArray.concatenate(list(map(sortBy, objs)), axis=1).argsort() tags = tags[shuf] index = index[shuf] # index = tags.copy(content=index.content) # reduces memory footprint return tags.copy(content=awkward.UnionArray(tags.content, index.content, [obj.content for obj in objs]))
def test_objfunc_eptshift(module, event, inputs, outputs): event.nsig = inputs["nsig"] event.source = inputs["source"] event.Electron.pt = awk.JaggedArray( inputs["starts"], inputs["stops"], np.array(inputs["ept"], dtype=np.float32), ) event.Electron.eta = awk.JaggedArray( inputs["starts"], inputs["stops"], np.array(inputs["eeta"], dtype=np.float32), ) for key, val in inputs["evvars"].items(): jagarr = awk.JaggedArray(inputs["starts"], inputs["stops"], np.array(val, dtype=np.float32)) setattr(event.Electron, key, jagarr) setattr(event, "Electron_{}".format(key), jagarr) module.begin(event) eptshift = event.Electron_ptShift(event, event.source, event.nsig) assert np.array_equal(eptshift.starts, np.array(inputs["starts"])) assert np.array_equal(eptshift.stops, np.array(inputs["stops"])) assert np.allclose( eptshift.content, np.array(outputs["eptshift"]), rtol=1e-6, equal_nan=True, )
def fromroot(self, data, byteoffsets, local_entrystart, local_entrystop): if local_entrystart == local_entrystop: return awkward.JaggedArray.fromoffsets( [0], self.content.fromroot(data, None, local_entrystart, local_entrystop)) else: if self.skipbytes == 0: offsets = _destructive_divide(byteoffsets, self.content.itemsize) starts = offsets[local_entrystart:local_entrystop] stops = offsets[local_entrystart + 1:local_entrystop + 1] content = self.content.fromroot(data, None, starts[0], stops[-1]) return awkward.JaggedArray(starts, stops, content) else: bytestarts = byteoffsets[ local_entrystart:local_entrystop] + self.skipbytes bytestops = byteoffsets[local_entrystart + 1:local_entrystop + 1] mask = awkward.util.numpy.zeros(len(data), dtype=awkward.util.numpy.int8) mask[bytestarts[bytestarts < len(data)]] = 1 awkward.util.numpy.add.at(mask, bytestops[bytestops < len(data)], -1) awkward.util.numpy.cumsum(mask, out=mask) data = data[mask.view(awkward.util.numpy.bool_)] content = self.content.fromroot(data, None, 0, bytestops[-1]) itemsize = 1 sub = self.content while hasattr(sub, "content"): sub = sub.content if isinstance(sub, uproot.interp.numerical.asdtype): itemsize = sub.fromdtype.itemsize if isinstance(sub, uproot.interp.numerical.asstlbitset): itemsize = sub.numbytes + 4 counts = bytestops - bytestarts shift = math.log(itemsize, 2) if shift == round(shift): awkward.util.numpy.right_shift(counts, int(shift), out=counts) else: awkward.util.numpy.floor_divide(counts, itemsize, out=counts) offsets = awkward.util.numpy.empty( len(counts) + 1, awkward.util.INDEXTYPE) offsets[0] = 0 awkward.util.numpy.cumsum(counts, out=offsets[1:]) return awkward.JaggedArray(offsets[:-1], offsets[1:], content)
def obj_drtrig(ev, source, nsig, coll, ref, ref_selection=None): @nb.njit([ "float32[:](int64[:], int64[:], float32[:], float32[:], int64[:], int64[:], float32[:], float32[:])" ]) def nb_dr_coll_ref( coll_starts, coll_stops, coll_eta, coll_phi, ref_starts, ref_stops, ref_eta, ref_phi, ): # maximally opposite in eta and phi coll_dr = (10. + np.pi) * np.ones_like(coll_eta, dtype=np.float32) for cstart, cstop, rstart, rstop in zip( coll_starts, coll_stops, ref_starts, ref_stops, ): for ic in range(cstart, cstop): coll_dr[ic] = min([ DeltaR2( coll_eta[ic] - ref_eta[ir], coll_phi[ic] - ref_phi[ir], ) for ir in range(rstart, rstop) ] + [10. + np.pi]) return coll_dr.astype(np.float32) ref_eta = getattr(ev, ref).eta if ref_selection is not None: mask = Lambda(ref_selection)(ev, source, nsig) else: mask = awk.JaggedArray( ref_eta.starts, ref_eta.stops, np.ones_like(ref_eta.content), ) starts, stops = getattr(ev, coll).eta.starts, getattr(ev, coll).eta.stops return awk.JaggedArray( starts, stops, nb_dr_coll_ref( starts, stops, getattr(ev, coll).eta.content, getattr(ev, coll).phi.content, ref_eta[mask].starts, ref_eta[mask].stops, ref_eta[mask].content, getattr(ev, ref).phi[mask].content, ), )
def mergeArray(a1, a2): ''' Merge two arrays into one, e.g. electrons and muons ''' import awkward a1_tags = awkward.JaggedArray(a1.starts, a1.stops, np.full(len(a1.content), 0, dtype=np.int64)) a1_index = awkward.JaggedArray(a1.starts, a1.stops, np.arange(len(a1.content), dtype=np.int64)) a2_tags = awkward.JaggedArray(a2.starts, a2.stops, np.full(len(a2.content), 1, dtype=np.int64)) a2_index = awkward.JaggedArray(a2.starts, a2.stops, np.arange(len(a2.content), dtype=np.int64)) tags = awkward.JaggedArray.concatenate([a1_tags, a2_tags], axis=1) index = awkward.JaggedArray.concatenate([a1_index, a2_index], axis=1) return awkward.JaggedArray(tags.starts, tags.stops, awkward.UnionArray(tags.content, index.content, [a1.content, a2.content]))
def __init__(self): self.MET_pt = np.array([10., 20., 30., 40., 50.]) self.MET_phi = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) self.Jet_pt = awk.JaggedArray( [0, 1, 3, 4, 7], [1, 3, 4, 7, 8], [60., 70., 80., 90., 100.], ) self.Jet_phi = awk.JaggedArray( [0, 1, 3, 4, 7], [1, 3, 4, 7, 8], [0.6, 0.7, 0.8, 0.9, 1.0], )
def jes_corr(ev, source, nsig, jesuncs): flavour = source if source in ev.JesSources and nsig != 0.: updown = "Up" if nsig > 0. else "Down" flavour += updown else: starts = ev.Jet_pt.starts stops = ev.Jet_pt.stops return awk.JaggedArray( starts, stops, np.ones_like(ev.Jet_pt.content, dtype=np.float32), ) if not ev.hasbranch("Jet_JEC{}".format(flavour)): df = jesuncs[jesuncs["source"] == ( source[3:] if source.startswith("jes") else source)] indices = get_bin_indices( [ev.Jet_eta.content], [df["eta_low"].values], [df["eta_high"].values], 1, )[:, 0] pt = np.array(list(df.iloc[indices]["pt"].values)) corr_up = np.array(list(df.iloc[indices]["corr_up"].values)) corr_down = np.array(list(df.iloc[indices]["corr_down"].values)) corr_up = interpolate(ev.Jet_ptJESOnly.content, pt, corr_up).astype(np.float32) corr_down = interpolate(ev.Jet_ptJESOnly.content, pt, corr_down).astype(np.float32) starts = ev.Jet_eta.starts stops = ev.Jet_eta.stops setattr(ev, "Jet_JEC{}Up".format(source), awk.JaggedArray( starts, stops, corr_up, )) setattr(ev, "Jet_JEC{}Down".format(source), awk.JaggedArray( starts, stops, -1. * corr_down, )) return getattr(ev, "Jet_JEC{}".format(flavour))
def test_objxclean_attr(module, event): c1_starts = [0, 1, 3] c1_stopys = [1, 3, 6] event.C1.eta = awk.JaggedArray( c1_starts, c1_stopys, [0.3, 0.9, 1.2, 0.6, 1.5, 1.8], ) event.C1.phi = awk.JaggedArray( c1_starts, c1_stopys, [0.15, 0.75, 1.05, 0.45, 1.35, 1.65], ) rc1_starts = [0, 0, 1] rc1_stopys = [0, 1, 2] def rc1_call(ev, attr): if attr == "eta": content = [1.1, 2.5] elif attr == "phi": content = [0.78, 1.65] else: assert False return awk.JaggedArray(rc1_starts, rc1_stopys, content) event.RC1 = mock.Mock(side_effect=rc1_call) rc2_starts = [0, 0, 0] rc2_stopys = [0, 0, 1] def rc2_call(ev, attr): if attr == "eta": content = [1.45] elif attr == "phi": content = [1.38] else: assert False return awk.JaggedArray(rc2_starts, rc2_stopys, content) event.RC2 = mock.Mock(side_effect=rc2_call) module.begin(event) xclean = event.C1_XCleanMask(event) assert np.array_equal(xclean.starts, np.array(c1_starts)) assert np.array_equal(xclean.stops, np.array(c1_stopys)) assert np.array_equal( xclean.content, np.array([True, False, False, True, False, True]), )
def test_gen_module(gen_module, event, starts, stops, flags, pdgs, ngtaul): event.size = len(starts) event.GenPart.statusFlags = awk.JaggedArray( np.array(starts, dtype=np.int32), np.array(stops, dtype=np.int32), np.array(flags, dtype=np.int32), ) event.GenPart.pdgId = awk.JaggedArray( np.array(starts, dtype=np.int32), np.array(stops, dtype=np.int32), np.array(pdgs, dtype=np.int32), ) gen_module.event(event) assert np.array_equal(event.nGenTauL, np.array(ngtaul, dtype=np.int32))
def do_jet_pt_resolution(self, event): indices = get_bin_indices( [ event.Jet_eta.content, event_to_object_var(event.fixedGridRhoFastjetAll, event.Jet_pt.starts, event.Jet_pt.stops) ], [self.jers["eta_low"].values, self.jers["rho_low"].values], [self.jers["eta_high"].values, self.jers["rho_high"].values], 1, )[:, 0] df = self.jers.iloc[indices] params = df[["param0", "param1", "param2", "param3"]].values ptbounds = df[["pt_low", "pt_high"]].values event.Jet_ptResolution = awk.JaggedArray( event.Jet_pt.starts, event.Jet_pt.stops, jer_formula( np.minimum(np.maximum(event.Jet_pt.content, ptbounds[:, 0]), ptbounds[:, 1]), params[:, 0], params[:, 1], params[:, 2], params[:, 3], ), )
def extract_vector_data(events, branches, entrystop=10, progressbar=False): def get(branch, flat=True): a = events.array(branch, entrystop=entrystop) if flat: return a.flatten() else: return a if len(branches) == 0: return {} first_branch_jagged = get(branches[0], flat=False) first_branch_flat = first_branch_jagged.flatten() event_jagged = get("event") + awkward.JaggedArray( first_branch_jagged.starts, first_branch_jagged.stops, np.zeros(len(first_branch_flat), dtype=np.int)) data = {} data["event"] = event_jagged.flatten() data[branches[0]] = first_branch_flat for br in tqdm(branches, disable=not progressbar): if br == branches[0]: continue if br in events: data[br] = get(br) else: print('Warning! Branch "' + br + '" not found in input file and skipped.') return pd.DataFrame(data)
def jet_pt_res(ev, jers): indices = get_bin_indices( [ np.abs(ev.Jet_eta.content), event_to_object_var(ev.fixedGridRhoFastjetAll, ev.Jet_ptJESOnly.starts, ev.Jet_ptJESOnly.stops) ], [jers["eta_low"].values, jers["rho_low"].values], [jers["eta_high"].values, jers["rho_high"].values], 1, )[:, 0] df = jers.iloc[indices] params = df[["param0", "param1", "param2", "param3"]].values.astype(np.float32) ptbounds = df[["pt_low", "pt_high"]].values return awk.JaggedArray( ev.Jet_ptJESOnly.starts, ev.Jet_ptJESOnly.stops, jer_formula( np.minimum(np.maximum(ev.Jet_ptJESOnly.content, ptbounds[:, 0]), ptbounds[:, 1]).astype(np.float32), params[:, 0], params[:, 1], params[:, 2], params[:, 3], ), )
def tau_pt_shift(ev, source, nsig): # see https://twiki.cern.ch/twiki/bin/view/CMS/TauIDRecommendation13TeV#Tau_energy_scale # corrections summed in quad to uncertainties are still dominated by # uncertainties, so lets use the quad @nb.njit([ "float32[:](float32[:],float32[:])", "float32[:](float32[:],int32[:])", ]) def nb_tau_pt_err(tau_pt, tau_dm): tau_pt_err = np.zeros_like(tau_pt, dtype=np.float32) for idx in range(len(tau_pt)): pt_err = 0. if (-0.5 < tau_dm[idx]) and (tau_dm[idx] < 0.5): pt_err = 0.012 #0.010 elif (0.5 < tau_dm[idx]) and (tau_dm[idx] < 2.5): pt_err = 0.010 #0.009 elif (9.5 < tau_dm[idx]) and (tau_dm[idx] < 10.5): pt_err = 0.011 #0.011 tau_pt_err[idx] = pt_err * tau_pt[idx] return tau_pt_err tau_pt_err = nb_tau_pt_err( ev.Tau_pt.content, ev.Tau_decayMode.content, ) shift = ((source == "tauPtScale") * tau_pt_err / ev.Tau_pt.content).astype( np.float32) return awk.JaggedArray( ev.Tau_pt.starts, ev.Tau_pt.stops, pt_shift_numba(ev.Tau_pt.content, nsig, shift, -1. * shift))
def clip_rr(array): for plane in ['_u', '_v', '_y']: new_content = np.clip(array['rr' + plane].content, 0, 1e9) array['rr' + plane] = awkward.JaggedArray( starts=array['rr' + plane].starts, stops=array['rr' + plane].stops, content=new_content)
def getitem_slice3(array, head, tail, advanced): if head.step == 0: raise ValueError offsets = numpy.full(len(array.starts) + 1, 999, int) offsets[0] = 0 index = numpy.full(len(array.content), 999, int) # too big, but okay k = 0 for i in range(len(array.starts)): length = array.stops[i] - array.starts[i] a, b, c = head.start, head.stop, head.step if c is None: c = 1 if a is None and c > 0: a = 0 elif a is None: a = length - 1 elif a < 0: a += length if b is None and c > 0: b = length elif b is None: b = -1 elif b < 0: b += length if c > 0: if b <= a: a, b = 0, 0 if a < 0: a = 0 elif a > length: a = length if b < 0: b = 0 elif b > length: b = length else: if a <= b: a, b = 0, 0 if a < -1: a = -1 elif a >= length: a = length - 1 if b < -1: b = -1 elif b >= length: b = length - 1 for j in range(a, b, c): index[k] = array.starts[i] + j k += 1 offsets[i + 1] = k starts = offsets[:-1] stops = offsets[1:] next = getitem_next(array.content[index[:k]], tail, spread_advanced(starts, stops, advanced)) return awkward.JaggedArray(starts, stops, next)
def getitem_enter(array, where): if len(where) == 0: return array arraylen = 0 for x in where: if isinstance(x, numpy.ndarray) and len(x.shape) == 1: if issubclass(x.dtype.type, (numpy.bool_, numpy.bool)): arraylen = max(arraylen, numpy.count_nonzero(x)) else: arraylen = max(arraylen, len(x)) newwhere = [] for x in where: if isinstance(x, numpy.ndarray) and len(x.shape) == 1 and issubclass(x.dtype.type, (numpy.bool, numpy.bool_)): newwhere.append(numpy.nonzero(x)[0]) elif isinstance(x, int) and arraylen != 0: newwhere.append(numpy.full(arraylen, x, int)) elif isinstance(x, numpy.ndarray) and x.shape == (1,): newwhere.append(numpy.full(arraylen, x, int)) else: newwhere.append(x) fake = getitem_next(awkward.JaggedArray([0], [len(array)], array), newwhere, None) if isinstance(fake, numpy.ndarray): return fake[0] else: return fake.content[fake.starts[0]:fake.stops[-1]]
def getitem_intarray_none(array, head, tail, advanced): offsets = numpy.full(len(array.starts) + 1, 999, int) offsets[0] = 0 index = numpy.full(len(head)*len(array.starts), 999, int) nextadvanced = numpy.full(len(index), 999, int) k = 0 for i in range(len(array.starts)): length = array.stops[i] - array.starts[i] for j in range(len(head)): norm = head[j] if norm < 0: norm += length if norm < 0 or norm >= length: raise IndexError("advanced index is out of bounds in JaggedArray") index[k] = array.starts[i] + norm nextadvanced[k] = j k += 1 offsets[i + 1] = k starts = offsets[:-1] stops = offsets[1:] next = getitem_next(array.content[index], tail, nextadvanced) return awkward.JaggedArray(starts, stops, next)
def _normalize_arrays(arrays): length = None for i in range(len(arrays)): if isinstance(arrays[i], Iterable): if length is None: length = len(arrays[i]) break if length is None: raise TypeError( "cannot construct an array if all arguments are scalar") arrays = list(arrays) starts, stops = None, None for i in range(len(arrays)): if starts is None and isinstance(arrays[i], awkward.JaggedArray): starts, stops = arrays[i].starts, arrays[i].stops if not isinstance(arrays[i], Iterable): arrays[i] = awkward.util.numpy.full(length, arrays[i]) arrays[i] = awkward.util.toarray(arrays[i], awkward.util.numpy.float64) if starts is None: return arrays for i in range(len(arrays)): if not isinstance(arrays[i], awkward.JaggedArray) or not ( awkward.util.numpy.array_equal(starts, arrays[i].starts) and awkward.util.numpy.array_equal(stops, arrays[i].stops)): content = awkward.util.numpy.zeros( stops.max(), dtype=awkward.util.numpy.float64) arrays[i] = awkward.JaggedArray(starts, stops, content) + arrays[ i] # invoke jagged broadcasting to align arrays return arrays
def evaluate(tree, expression): cleaned_expression, alias_dict = preprocess_expression(expression) adaptor = TreeToDictAdaptor(tree, alias_dict) result = numexpr.evaluate(cleaned_expression, local_dict=adaptor) if adaptor.starts is not None: result = awkward.JaggedArray(adaptor.starts, adaptor.stops, result) return result
def jet_selection(self, attr): assert attr == 'phi' return awk.JaggedArray( np.array([0, 0, 1, 3, 6, 10], dtype=np.int32), np.array([0, 1, 3, 6, 10, 15], dtype=np.int32), np.array([ 0.0, 0.0, 0.5, 0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 1.5, 0.5, 1.0, 1.5, 2.0, 2.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, ], dtype=np.float32), )
def ele_selection(self, attr): assert attr == 'charge' return awk.JaggedArray( np.array([0, 0, 0, 1], dtype=np.int32), np.array([0, 0, 1, 2], dtype=np.int32), np.array([-1, 1], dtype=np.int32), )
def ele_pt_shift(ev, source, nsig): shift = ( (source == "eleEnergyScale") * ev.Electron_energyErr.content / (ev.Electron.pt.content * np.cosh(ev.Electron.eta.content))).astype( np.float32) return awk.JaggedArray( ev.Electron.pt.starts, ev.Electron.pt.stops, pt_shift_numba(ev.Electron.pt.content, nsig, shift, -shift))
def rc2_call(ev, attr): if attr == "eta": content = [1.45] elif attr == "phi": content = [1.38] else: assert False return awk.JaggedArray(rc2_starts, rc2_stopys, content)
def fevaluate_skim(ev, evidx, nsig, source, name_, objname_): starts = getattr(ev, objname_).pt.starts stops = getattr(ev, objname_).pt.stops return awk.JaggedArray( starts, stops, reduce(operator.add, cutlist)(ev).content, )
def fphoton_pt_shift(ev, evidx, nsig, source): shift = (source == "photonEnergyScale" ) * ev.Photon_energyErr.content / ev.Photon.pt.content result = awk.JaggedArray( ev.Photon.pt.starts, ev.Photon.pt.stops, pt_shift_numba(ev.Photon.pt.content, nsig, shift, -shift)) result.content[np.isnan(result).content] = 0. return result
def calibrateDedxExternal(self, array, plane_num): dedx_values, parameters_values, starts, stops = self.prepareDedxWholeDataset( array, plane_num) dedx_values_calibrated = self.applyCalibration(plane_num, dedx_values, parameters_values) return awkward.JaggedArray(content=dedx_values_calibrated, starts=starts, stops=stops)