def test_3D_jagged(wrapped_tree): fake_3d = [[np.arange(i + 1) + j for i in range(j % 3)] for j in range(len(wrapped_tree))] fake_3d = JaggedArray.fromiter(fake_3d) wrapped_tree.new_variable("Fake3D", fake_3d) assert isinstance(fake_3d.count(), JaggedArray) assert all((fake_3d.copy().count() == fake_3d.count()).all()) aliased = expressions.evaluate(wrapped_tree, "Fake3D") assert (aliased == fake_3d).all().all().all() doubled = expressions.evaluate(wrapped_tree, "Fake3D * 2") assert (doubled == fake_3d * 2).all().all().all() assert len(doubled[0, :, :]) == 0 assert doubled[1, 0, :] == [2] assert doubled[2, 0, :] == [4] assert all(doubled[2, 1, :] == [4, 6]) doubled = expressions.evaluate(wrapped_tree, "Fake3D + Fake3D") assert (doubled == fake_3d * 2).all().all().all() assert len(doubled[0, :, :]) == 0 assert doubled[1, 0, :] == [2] assert doubled[2, 0, :] == [4] assert all(doubled[2, 1, :] == [4, 6]) fake_3d_2 = [[np.arange(i + 3) + j for i in range(j % 2)] for j in range(len(wrapped_tree))] fake_3d_2 = JaggedArray.fromiter(fake_3d_2) wrapped_tree.new_variable("SecondFake3D", fake_3d_2) with pytest.raises(RuntimeError) as e: expressions.evaluate(wrapped_tree, "SecondFake3D + Fake3D") assert "different jaggedness" in str(e)
def NestNestObjArrayToJagged(objarr): """uproot read vector<vector<number>> TBranch as objectArray, this function convert it to JaggedJaggedArray """ # jaggedArray of lists jaggedList = JaggedArray.fromiter(objarr) # flat to 1 level _jagged = JaggedArray.fromiter(jaggedList.content) return JaggedArray.fromoffsets(jaggedList.offsets, _jagged)
def __call__(self, *args): inputs = list(args) offsets = None # TODO: check can use offsets (this should always be true for striped) # Alternatively we can just use starts and stops for i in range(len(inputs)): if isinstance(inputs[i], JaggedArray): if offsets is not None and offsets.base is not inputs[ i].offsets.base: if type(offsets) is int: raise Exception( 'Do not mix JaggedArrays and numpy arrays when calling derived class of lookup_base' ) elif type(offsets ) is np.ndarray and offsets.base is not inputs[ i].offsets.base: raise Exception( 'All input jagged arrays must have a common structure (offsets)!' ) offsets = inputs[i].offsets inputs[i] = inputs[i].content elif isinstance(inputs[i], np.ndarray): if offsets is not None: if type(offsets) is np.ndarray: raise Exception( 'do not mix JaggedArrays and numpy arrays when calling a derived class of lookup_base' ) offsets = -1 retval = self._evaluate(*tuple(inputs)) if offsets is not None and type(offsets) is not int: retval = JaggedArray.fromoffsets(offsets, retval) return retval
def _kExtra(self, kpt, eta, nl, u, s=0, m=0): # if it is a jagged array, save the offsets then flatten everything # needed for the ternary conditions later offsets = None if isinstance(kpt, JaggedArray): offsets = kpt.offsets kpt = kpt.flatten() eta = eta.flatten() nl = nl.flatten() u = u.flatten() abseta = abs(eta) kData = self._kRes[s][m][1](abseta) # type 1 is data kMC = self._kRes[s][m][0](abseta) # type 0 is MC mask = kData > kMC x = np.zeros_like(kpt) sigma = self._sigma(kpt, eta, nl, s, m) # Rochester cbA = beta, cbN = m, as well as cbM (always 0?) = loc and cbS = scale to transform y = (x-loc)/scale in the pdf method cbA = self._cbA[s][m](abseta, nl) cbN = self._cbN[s][m](abseta, nl) loc = np.zeros_like(u) cbS = self._cbS[s][m](abseta, nl) invcdf = doublecrystalball.ppf(u, cbA, cbA, cbN, cbN, loc, cbS) x[mask] = (np.sqrt(kData[mask] * kData[mask] - kMC[mask] * kMC[mask]) * sigma[mask] * invcdf[mask]) result = np.ones_like(kpt) result[(x > -1)] = 1.0 / (1.0 + x[x > -1]) if offsets is not None: result = JaggedArray.fromoffsets(offsets, result) return result
def test_jagged_nth_3D(jagged_1): fake_3d = [[np.arange(i + 1) + j for i in range(j % 3)] for j in range(5)] fake_3d = JaggedArray.fromiter(fake_3d) get_second = reductions.JaggedNth(1, np.nan) reduced = get_second(fake_3d) assert len(reduced[0]) == 0 assert len(reduced[1]) == 1 assert np.isnan(reduced[1]) assert len(reduced[2]) == 2 assert np.isnan(reduced[2][0]) assert reduced[2][1] == 3 assert len(reduced[3]) == 0 assert len(reduced[4]) == 1 assert np.isnan(reduced[4]) get_first = reductions.JaggedNth(0, np.nan) reduced = get_first(fake_3d) assert len(reduced[0]) == 0 assert len(reduced[1]) == 1 assert reduced[1][0] == 1 assert len(reduced[2]) == 2 assert reduced[2][0] == 2 assert reduced[2][1] == 2 assert len(reduced[3]) == 0 assert len(reduced[4]) == 1 assert reduced[4] == 4
def passLooseJetSel(jet): outs = np.ones_like(jet.pt.content,dtype=np.bool) absEta = np.abs(jet.eta.content) etaVFor = (absEta <= 3.0) etaFor = (absEta <= 2.7) etaCen = (absEta <= 2.4) #forward jets outs[etaFor] &= ( (jet.neuHadFrac.content[etaFor] < 0.99) & (jet.neuEmFrac.content[etaFor] < 0.99) & (jet.nParticles.content[etaFor] > 1 ) ) #central jets outs[etaCen] &= ( (jet.chHadFrac.content[etaCen] > 0.0 ) & (jet.nCharged.content[etaCen] > 0 ) & (jet.chEmFrac.content[etaCen] < 0.99 ) ) #2.7-3.0 etaHE = etaVFor & ~etaFor outs[etaHE] &= ( (jet.neuEmFrac.content[etaHE] > 0.01) & (jet.neuHadFrac.content[etaHE] < 0.98) & (jet.nNeutrals.content[etaHE] > 2 ) ) # > 3.0 etaHF = ~etaVFor outs[etaHF] &= ( (jet.neuEmFrac.content[etaHF] > 0.90) & (jet.nNeutrals.content[etaHF] > 10 ) ) outs = JaggedArray.fromoffsets(jet.pt.offsets,outs) return outs
def getvar(events, name, default=None, parents="run"): if name in events: return events[name] if parents not in events: return None else: if isinstance(events[parents], np.ndarray): return np.full_like(events[parents], default) if isinstance(events[parents], JaggedArray): content = [default] * events[parents].flatten().shape[0] return JaggedArray.fromoffsets(events[parents].offsets, content)
def setup_gen_candidates(df): # Find first ancestor with different PDG ID # before defining the gen candidates mothers = JaggedArray.fromcounts(df['nGenPart'], df['GenPart_genPartIdxMother']) pdgids = JaggedArray.fromcounts(df['nGenPart'], df['GenPart_pdgId']) parent_index = find_first_parent(mothers, pdgids) gen = JaggedCandidateArray.candidatesfromcounts( df['nGenPart'], pt=df['GenPart_pt'], eta=df['GenPart_eta'], phi=df['GenPart_phi'], mass=df['GenPart_mass'], charge=df['GenPart_pdgId'], pdg=df['GenPart_pdgId'], status=df['GenPart_status'], flag=df['GenPart_statusFlags'], mother=df['GenPart_genPartIdxMother'], parentIndex=parent_index.flatten()) return gen
def get_lepton_values(zl, key): val = np.zeros_like(zl.flatten(), dtype=float) if len(val) == 0: return JaggedArray.fromoffsets(zl.offsets, val) for i in range(2): mask = (i == zl.flatten()) if key == 'pt': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].pt elif key == 'eta': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].eta elif key == 'phi': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].phi elif key == 'mass': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].mass else: val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key] return JaggedArray.fromoffsets(zl.offsets, val)
def event(self, chunk): for output, expression, reduction, fill_missing in self._variables: branches = get_branches(expression, chunk.tree.allkeys()) data = chunk.tree.pandas.df(branches) result = data.eval(expression) if reduction: groups = result.groupby(level=0) result = reduction(groups) array = result.values else: events = result.index.get_level_values(0).values events -= events[0] array = JaggedArray.fromparents(events, result.values) chunk.tree.new_variable(output, array) return True
def passJetTightLepVetoSel(jet): outs = np.ones_like(jet.pt.content,dtype=np.bool) absEta = np.abs(jet.eta.content) etaFor = (absEta <= 2.7) etaCen = (absEta <= 2.4) #forward jets outs[etaFor] &= ( (jet.neuHadFrac.content[etaFor] < 0.90) & (jet.neuEmFrac.content[etaFor] < 0.90) & (jet.nParticles.content[etaFor] > 1 ) & (jet.muonFrac.content[etaFor] < 0.8 ) ) #central jets outs[etaCen] &= ( (jet.chHadFrac.content[etaCen] > 0.0 ) & (jet.nCharged.content[etaCen] > 0 ) & (jet.chEmFrac.content[etaCen] < 0.9 ) ) outs = JaggedArray.fromoffsets(jet.pt.offsets,outs) return outs
def test_rochester(): rochester_data = lookup_tools.txt_converters.convert_rochester_file('tests/samples/RoccoR2018.txt.gz',loaduncs=True) rochester = lookup_tools.rochester_lookup.rochester_lookup(rochester_data) # to test 1-to-1 agreement with official Rochester requires loading C++ files # instead, preload the correct scales in the sample directory # the script tests/samples/rochester/build_rochester.py produces these official_data_k = np.load('tests/samples/nano_dimuon_rochester.npy') official_data_err = np.load('tests/samples/nano_dimuon_rochester_err.npy') official_mc_k = np.load('tests/samples/nano_dy_rochester.npy') official_mc_err = np.load('tests/samples/nano_dy_rochester_err.npy') mc_rand = np.load('tests/samples/nano_dy_rochester_rand.npy') # test against nanoaod events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dimuon.root')) data_k = rochester.kScaleDT(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) assert(all(np.isclose(data_k.flatten(), official_data_k))) data_err = rochester.kScaleDTerror(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) data_err = np.array(data_err.flatten(), dtype=float) assert(all(np.isclose(data_err, official_data_err, atol=1e-8))) # test against mc events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dy.root')) hasgen = ~np.isnan(events.Muon.matched_gen.pt.fillna(np.nan)) mc_rand = JaggedArray.fromoffsets(hasgen.offsets, mc_rand) mc_kspread = rochester.kSpreadMC(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen]) mc_ksmear = rochester.kSmearMC(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen]) mc_k = np.ones_like(events.Muon.pt.flatten()) mc_k[hasgen.flatten()] = mc_kspread.flatten() mc_k[~hasgen.flatten()] = mc_ksmear.flatten() assert(all(np.isclose(mc_k, official_mc_k))) mc_errspread = rochester.kSpreadMCerror(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen]) mc_errsmear = rochester.kSmearMCerror(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen]) mc_err = np.ones_like(events.Muon.pt.flatten()) mc_err[hasgen.flatten()] = mc_errspread.flatten() mc_err[~hasgen.flatten()] = mc_errsmear.flatten() assert(all(np.isclose(mc_err, official_mc_err, atol=1e-8)))
def getSubCorrections(self, **kwargs): """ Returns the set of corrections for all input jets broken down by level use like: jecs = corrector.getSubCorrections(JetProperty1=jet.property1,...) 'jecs' will be formatted like [[jec_jet1 jec_jet2 ...] ...] """ localargs = kwargs firstarg = localargs[self._signature[0]] cumulativeCorrection = 1.0 offsets = None if isinstance(firstarg, JaggedArray): offsets = firstarg.offsets cumulativeCorrection = firstarg.ones_like().content for key in localargs.keys(): localargs[key] = localargs[key].content else: cumulativeCorrection = np.ones_like(firstarg) corrVars = [] if 'JetPt' in localargs.keys(): corrVars.append('JetPt') if 'JetE' in localargs.keys(): corrVars.append('JetE') if len(corrVars) == 0: raise Exception( 'No variable to correct, need JetPt or JetE in inputs!') corrections = [] for i, func in enumerate(self._funcs): sig = func.signature args = [] for input in sig: args.append(localargs[input]) corr = func(*tuple(args)) for var in corrVars: localargs[var] *= corr cumulativeCorrection *= corr corrections.append(cumulativeCorrection) if offsets is not None: for i in range(len(corrections)): corrections[i] = JaggedArray.fromoffsets( offsets, corrections[i]) return corrections
def convert_effective_area_file(eaFilePath): ea_f = open(eaFilePath, 'r') layoutstr = ea_f.readline().strip().strip('{}') ea_f.close() name = eaFilePath.split('/')[-1].split('.')[0] layout = layoutstr.split() if not layout[0].isdigit(): raise Exception( 'First column of Effective Area File Header must be a digit!') #setup the file format nBinnedVars = int(layout[0]) nBinColumns = 2 * nBinnedVars nEvalVars = int(layout[nBinnedVars + 1]) minMax = ['Min', 'Max'] columns = [] dtypes = [] offset = 1 for i in range(nBinnedVars): columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax]) dtypes.extend(['<f8', '<f8']) offset += nBinnedVars + 1 for i in range(nEvalVars): columns.append('%s' % (layout[i + offset])) dtypes.append('<f8') pars = np.genfromtxt(eaFilePath, dtype=tuple(dtypes), names=tuple(columns), skip_header=1, unpack=True, encoding='ascii') bins = {} offset_col = 0 offset_name = 1 bin_order = [] for i in range(nBinnedVars): binMins = None binMaxs = None if i == 0: binMins = np.unique(pars[columns[0]]) binMaxs = np.unique(pars[columns[1]]) bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs) else: counts = np.zeros(0, dtype=np.int) allBins = np.zeros(0, dtype=np.double) for binMin in bins[bin_order[0]][:-1]: binMins = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col]]) binMaxs = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col + 1]]) theBins = np.union1d(binMins, binMaxs) allBins = np.append(allBins, theBins) counts = np.append(counts, theBins.size) bins[layout[i + offset_name]] = JaggedArray.fromcounts( counts, allBins) bin_order.append(layout[i + offset_name]) offset_col += 1 # again this is only for one dimension of binning, fight me # we can figure out a 2D EA when we get there offset_name += 1 wrapped_up = {} lookup_type = 'dense_lookup' dims = bins[layout[1]] for i in range(nEvalVars): ea_name = '_'.join([name, columns[offset_name + i]]) values = pars[columns[offset_name + i]] wrapped_up[(ea_name, lookup_type)] = (values, dims) return wrapped_up
def process(self, df): output = self.accumulator.identity() # ---- Define dataset ---- # dataset = df['dataset'] #coffea.processor.LazyDataFrame Dataset_info = df.available #list of available columns in LazyDataFrame object (Similar to 'Events->Show()' command in ROOT) # ---- Get triggers from Dataset_info ---- # #triggers = [itrig for itrig in Dataset_info if 'HLT_PFHT' in itrig] #AK8triggers = [itrig for itrig in Dataset_info if 'HLT_AK8PFHT' in itrig] # ---- Find numeric values in trigger strings ---- # #triggers_cut1 = [sub.split('PFHT')[1] for sub in triggers] # Remove string characters from left of number #triggers_cut2 = [sub.split('_')[0] for sub in triggers_cut1] # Remove string characters from right of number #isTriggerValue = [val.isnumeric() for val in triggers_cut2] # Boolean -> if string is only a number #triggers_cut2 = np.where(isTriggerValue, triggers_cut2, 0) # If string is not a number, replace with 0 #triggers_vals = [int(val) for val in triggers_cut2] # Convert string numbers to integers #AK8triggers_cut1 = [sub.split('HT')[1] for sub in AK8triggers] #AK8triggers_cut2 = [sub.split('_')[0] for sub in AK8triggers_cut1] #isAK8TriggerValue = [val.isnumeric() for val in AK8triggers_cut2] #AK8triggers_cut2 = np.where(isAK8TriggerValue, AK8triggers_cut2, 0) #AK8triggers_vals = [int(val) for val in AK8triggers_cut2] # ---- Find Largest and Second Largest Value ---- # #triggers_vals.sort(reverse = True) #AK8triggers_vals.sort(reverse = True) #triggers_vals1 = str(triggers_vals[0]) #triggers_vals2 = str(triggers_vals[1]) #AK8triggers_vals1 = str(AK8triggers_vals[0]) #AK8triggers_vals2 = str(AK8triggers_vals[1]) # ---- Define strings for the selected triggers ---- # #HLT_trig1_str = [itrig for itrig in triggers if (triggers_vals1) in itrig][0] #HLT_trig2_str = [itrig for itrig in triggers if (triggers_vals2) in itrig][0] #HLT_AK8_trig1_str = [itrig for itrig in AK8triggers if (AK8triggers_vals1) in itrig][0] #HLT_AK8_trig2_str = [itrig for itrig in AK8triggers if (AK8triggers_vals2) in itrig][0] # ---- Define HLT triggers to be used ---- # #HLT_trig1 = df[HLT_trig1_str] #HLT_trig2 = df[HLT_trig2_str] #HLT_AK8_trig1 = df[HLT_AK8_trig1_str] #HLT_AK8_trig2 = df[HLT_AK8_trig2_str] # ---- Define AK8 Jets as FatJets ---- # FatJets = JaggedCandidateArray.candidatesfromcounts( df['nFatJet'], pt=df['FatJet_pt'], eta=df['FatJet_eta'], phi=df['FatJet_phi'], mass=df['FatJet_mass'], area=df['FatJet_area'], msoftdrop=df['FatJet_msoftdrop'], jetId=df['FatJet_jetId'], tau1=df['FatJet_tau1'], tau2=df['FatJet_tau2'], tau3=df['FatJet_tau3'], tau4=df['FatJet_tau4'], n3b1=df['FatJet_n3b1'], btagDeepB=df['FatJet_btagDeepB'], btagCSVV2=df['FatJet_btagCSVV2'], deepTag_TvsQCD=df['FatJet_deepTag_TvsQCD'], deepTagMD_TvsQCD=df['FatJet_deepTagMD_TvsQCD'], subJetIdx1=df['FatJet_subJetIdx1'], subJetIdx2=df['FatJet_subJetIdx2']) # ---- Define AK4 jets as Jets ---- # Jets = JaggedCandidateArray.candidatesfromcounts(df['nJet'], pt=df['Jet_pt'], eta=df['Jet_eta'], phi=df['Jet_phi'], mass=df['Jet_mass'], area=df['Jet_area']) # ---- Define SubJets ---- # SubJets = JaggedCandidateArray.candidatesfromcounts( df['nSubJet'], pt=df['SubJet_pt'], eta=df['SubJet_eta'], phi=df['SubJet_phi'], mass=df['SubJet_mass'], btagDeepB=df['SubJet_btagDeepB'], btagCSVV2=df['SubJet_btagCSVV2']) # ---- Get event weights from dataset ---- # if 'JetHT' in dataset: # If data is used... evtweights = np.ones(FatJets.size) # set all "data weights" to one else: # if Monte Carlo dataset is used... evtweights = df["Generator_weight"].reshape(-1, 1).flatten() # ---- Show all events ---- # output['cutflow']['all events'] += FatJets.size # ---- Apply Trigger(s) ---- # #FatJets = FatJets[HLT_AK8_trig1] #evtweights = evtweights[HLT_AK8_trig1] #Jets = Jets[HLT_AK8_trig1] #SubJets = SubJets[HLT_AK8_trig1] # ---- Jets that satisfy Jet ID ---- # jet_id = (FatJets.jetId > 0) # Loose jet ID FatJets = FatJets[jet_id] output['cutflow']['jet id'] += jet_id.any().sum() # ---- Apply pT Cut and Rapidity Window ---- # jetkincut_index = (FatJets.pt > self.ak8PtMin) & (np.abs( FatJets.p4.rapidity) < 2.4) FatJets = FatJets[jetkincut_index] output['cutflow']['jet kin'] += jetkincut_index.any().sum() # ---- Find two AK8 Jets ---- # twoFatJetsKin = (FatJets.counts == 2) FatJets = FatJets[twoFatJetsKin] evtweights = evtweights[twoFatJetsKin] Jets = Jets[twoFatJetsKin] SubJets = SubJets[twoFatJetsKin] output['cutflow']['two FatJets and jet kin'] += twoFatJetsKin.sum() # ---- Apply HT Cut ---- # hT = Jets.pt.sum() passhT = (hT > self.htCut) evtweights = evtweights[passhT] FatJets = FatJets[passhT] SubJets = SubJets[passhT] # ---- Randomly Assign AK8 Jets as TTbar Candidates 0 and 1 --- # if self.RandomDebugMode == True: # 'Sudo' randomizer for consistent results highPhi = FatJets.phi[:, 0] > FatJets.phi[:, 1] highRandIndex = np.where(highPhi, 0, 1) index = JaggedArray.fromcounts(np.ones(len(FatJets), dtype='i'), highRandIndex) else: # Truly randomize index = JaggedArray.fromcounts( np.ones(len(FatJets), dtype='i'), self.prng.randint(2, size=len(FatJets))) jet0 = FatJets[index] #J0 jet1 = FatJets[1 - index] #J1 ttbarcands = jet0.cross(jet1) #FatJets[:,0:2].distincts() # ---- Make sure we have at least 1 TTbar candidate pair and re-broadcast releveant arrays ---- # oneTTbar = (ttbarcands.counts >= 1) output['cutflow']['>= one oneTTbar'] += oneTTbar.sum() ttbarcands = ttbarcands[oneTTbar] evtweights = evtweights[oneTTbar] FatJets = FatJets[oneTTbar] SubJets = SubJets[oneTTbar] # ---- Apply Delta Phi Cut for Back to Back Topology ---- # dPhiCut = (ttbarcands.i0.p4.delta_phi(ttbarcands.i1.p4) > 2.1).flatten() output['cutflow']['dPhi > 2.1'] += dPhiCut.sum() ttbarcands = ttbarcands[dPhiCut] evtweights = evtweights[dPhiCut] FatJets = FatJets[dPhiCut] SubJets = SubJets[dPhiCut] # ---- Identify subjets according to subjet ID ---- # hasSubjets0 = ((ttbarcands.i0.subJetIdx1 > -1) & (ttbarcands.i0.subJetIdx2 > -1)) hasSubjets1 = ((ttbarcands.i1.subJetIdx1 > -1) & (ttbarcands.i1.subJetIdx2 > -1)) GoodSubjets = ((hasSubjets0) & (hasSubjets1)).flatten() ttbarcands = ttbarcands[GoodSubjets] SubJets = SubJets[GoodSubjets] evtweights = evtweights[GoodSubjets] SubJet01 = SubJets[ttbarcands.i0.subJetIdx1] # FatJet i0 with subjet 1 SubJet02 = SubJets[ttbarcands.i0.subJetIdx2] # FatJet i0 with subjet 2 SubJet11 = SubJets[ttbarcands.i1.subJetIdx1] # FatJet i1 with subjet 1 SubJet12 = SubJets[ttbarcands.i1.subJetIdx2] # FatJet i1 with subjet 2 # ---- Define Rapidity Regions ---- # cen = np.abs(ttbarcands.i0.p4.rapidity - ttbarcands.i1.p4.rapidity) < 1.0 fwd = (~cen) # ---- CMS Top Tagger Version 2 (SD and Tau32 Cuts) ---- # tau32_i0 = np.where(ttbarcands.i0.tau2 > 0, ttbarcands.i0.tau3 / ttbarcands.i0.tau2, 0) tau32_i1 = np.where(ttbarcands.i1.tau2 > 0, ttbarcands.i1.tau3 / ttbarcands.i1.tau2, 0) taucut_i0 = tau32_i0 < self.tau32Cut taucut_i1 = tau32_i1 < self.tau32Cut mcut_i0 = (self.minMSD < ttbarcands.i0.msoftdrop) & ( ttbarcands.i0.msoftdrop < self.maxMSD) mcut_i1 = (self.minMSD < ttbarcands.i1.msoftdrop) & ( ttbarcands.i1.msoftdrop < self.maxMSD) ttag_i0 = (taucut_i0) & (mcut_i0) ttag_i1 = (taucut_i1) & (mcut_i1) # ---- Define "Top Tag" Regions ---- # antitag = (~taucut_i0) & (mcut_i0 ) #Probe will always be ttbarcands.i1 (at) antitag_probe = np.logical_and( antitag, ttag_i1 ) #Found an antitag and ttagged probe pair for mistag rate (Pt) pretag = ttag_i0 # Only jet0 (pret) ttag0 = (~ttag_i0) & (~ttag_i1) # No tops tagged (0t) ttag1 = ttag_i0 ^ ttag_i1 # Exclusively one top tagged (1t) ttagI = ttag_i0 | ttag_i1 # At least one top tagged ('I' for 'inclusive' tagger; >=1t; 1t+2t) ttag2 = ttag_i0 & ttag_i1 # Both jets top tagged (2t) Alltags = ttag0 | ttagI #Either no tag or at least one tag (0t+1t+2t) # ---- Pick FatJet that passes btag cut based on its subjet with the highest btag value ---- # btag_i0 = (np.maximum(SubJet01.btagCSVV2, SubJet02.btagCSVV2) > self.bdisc) btag_i1 = (np.maximum(SubJet11.btagCSVV2, SubJet12.btagCSVV2) > self.bdisc) # --- Define "B Tag" Regions ---- # btag0 = (~btag_i0) & (~btag_i1) #(0b) btag1 = btag_i0 ^ btag_i1 #(1b) btag2 = btag_i0 & btag_i1 #(2b) # ---- Get Analysis Categories ---- # # ---- They are (central, forward) cross (0b,1b,2b) cross (At,at,0t,1t,>=1t,2t) ---- # regs = [cen, fwd] btags = [btag0, btag1, btag2] ttags = [ antitag_probe, antitag, pretag, ttag0, ttag1, ttagI, ttag2, Alltags ] cats = [(t & b & y).flatten() for t, b, y in itertools.product(ttags, btags, regs)] labels_and_categories = dict(zip(self.anacats, cats)) # ---- Variables for Kinematic Histograms ---- # # ---- "i0" is the control jet, "i1" is the probe jet ---- # ttbarmass = ttbarcands.p4.sum().mass.flatten() jetpt = ttbarcands.i1.pt.flatten() jeteta = ttbarcands.i1.eta.flatten() jetphi = ttbarcands.i1.phi.flatten() jety = ttbarcands.i1.p4.rapidity.flatten() jetmass = ttbarcands.i1.p4.mass.flatten() SDmass = ttbarcands.i1.msoftdrop.flatten() jetdy = np.abs(ttbarcands.i0.p4.rapidity.flatten() - ttbarcands.i1.p4.rapidity.flatten()) Tau32 = (ttbarcands.i1.tau3 / ttbarcands.i1.tau2).flatten() # ---- Variables for Deep Tagger Analysis ---- # deepTag = ttbarcands.i1.deepTag_TvsQCD.flatten() deepTagMD = ttbarcands.i1.deepTagMD_TvsQCD.flatten() weights = evtweights.flatten() # ---- Define the SumW2 for MC Datasets ---- # output['cutflow']['sumw'] += np.sum(weights) output['cutflow']['sumw2'] += np.sum(weights**2) # ---- Define Momentum p of probe jet as the Mistag Rate variable; M(p) ---- # # ---- Transverse Momentum pT can also be used instead; M(pT) ---- # pT = ttbarcands.i1.pt.flatten() eta = ttbarcands.i1.eta.flatten() pz = np.sinh(eta) * pT p = np.absolute(np.sqrt(pT**2 + pz**2)) # ---- Define the Numerator and Denominator for Mistag Rate ---- # numerator = np.where( antitag_probe, p, -1) # If no antitag and tagged probe, move event to useless bin denominator = np.where(antitag, p, -1) # If no antitag, move event to useless bin df = pd.DataFrame({"momentum": p}) # Used for finding values in LookUp Tables for ilabel, icat in labels_and_categories.items(): ### ------------------------------------ Mistag Scaling ------------------------------------ ### if self.UseLookUpTables == True: # ---- Weight ttbar M.C. and data by mistag from data (corresponding to its year) ---- # if 'TTbar_' in dataset: file_df = self.lu['JetHT' + dataset[-4:] + '_Data'][ 'at' + str( ilabel[-5:] )] #Pick out proper JetHT year mistag for TTbar sim. elif dataset == 'TTbar': file_df = self.lu['JetHT']['at' + str( ilabel[-5:])] # All JetHT years mistag for TTbar sim. else: file_df = self.lu[dataset]['at' + str( ilabel[-5:])] # get mistag (lookup) filename for 'at' bin_widths = file_df[ 'p'].values # collect bins as written in .csv file mtr = file_df[ 'M(p)'].values # collect mistag rate as function of p as written in file wgts = mtr # Define weights based on mistag rates BinKeys = np.arange( bin_widths.size ) # Use as label for BinNumber column in the new dataframe #Bins = pd.interval_range(start=0, periods=100, freq=100, closed='left') # Recreate the momentum bins from file_df as something readable for pd.cut() Bins = np.array(manual_bins) df['BinWidth'] = pd.cut(p, bins=Bins) # new dataframe column df['BinNumber'] = pd.cut(p, bins=Bins, labels=BinKeys) BinNumber = df[ 'BinNumber'].values # Collect the Bin Numbers into a numpy array BinNumber = BinNumber.astype( 'int64') # Insures the bin numbers are integers WeightMatching = wgts[ BinNumber] # Match 'wgts' with corresponding p bin using the bin number Weights = weights * WeightMatching # Include 'wgts' with the previously defined 'weights' else: Weights = weights # No mistag rates, no change to weights ###---------------------------------------------------------------------------------------------### ### ----------------------------------- Mod-mass Procedure ------------------------------------ ### if self.ModMass == True: QCD_unweighted = util.load( 'TTbarResCoffea_QCD_unweighted_output.coffea') # ---- Extract event counts from QCD MC hist in signal region ---- # QCD_hist = QCD_unweighted['jetmass'].integrate( 'anacat', '2t' + str(ilabel[-5:])).integrate('dataset', 'QCD') data = QCD_hist.values() # Dictionary of values QCD_data = [ i for i in data.values() ][0] # place every element of the dictionary into a numpy array # ---- Re-create Bins from QCD_hist as Numpy Array ---- # bins = np.arange( 510 ) #Re-make bins from the jetmass_axis starting with the appropriate range QCD_bins = bins[:: 10] #Finish re-making bins by insuring exactly 50 bins like the jetmass_axis # ---- Define Mod Mass Distribution ---- # ModMass_hist_dist = ss.rv_histogram([QCD_data, QCD_bins]) jet1_modp4 = copy.copy( jet1.p4 ) #J1's Lorentz four vector that can be safely modified jet1_modp4["fMass"] = ModMass_hist_dist.rvs( size=jet1_modp4.size ) #Replace J1's mass with random value of mass from mm hist ttbarcands_modmass = jet0.p4.cross( jet1_modp4) #J0's four vector x modified J1's four vector # ---- Apply Necessary Selections to new modmass version ---- # ttbarcands_modmass = ttbarcands_modmass[oneTTbar] ttbarcands_modmass = ttbarcands_modmass[dPhiCut] ttbarcands_modmass = ttbarcands_modmass[GoodSubjets] # ---- Manually sum the modmass p4 candidates (Coffea technicality) ---- # ttbarcands_modmass_p4_sum = (ttbarcands_modmass.i0 + ttbarcands_modmass.i1) # ---- Re-define Mass Variables for ModMass Procedure (pt, eta, phi are redundant to change) ---- # ttbarmass = ttbarcands_modmass_p4_sum.flatten().mass jetmass = ttbarcands_modmass.i1.mass.flatten() ###---------------------------------------------------------------------------------------------### output['cutflow'][ilabel] += np.sum(icat) output['ttbarmass'].fill(dataset=dataset, anacat=ilabel, ttbarmass=ttbarmass[icat], weight=Weights[icat]) output['jetpt'].fill(dataset=dataset, anacat=ilabel, jetpt=jetpt[icat], weight=Weights[icat]) output['probept'].fill(dataset=dataset, anacat=ilabel, jetpt=pT[icat], weight=Weights[icat]) output['probep'].fill(dataset=dataset, anacat=ilabel, jetp=p[icat], weight=Weights[icat]) output['jeteta'].fill(dataset=dataset, anacat=ilabel, jeteta=jeteta[icat], weight=Weights[icat]) output['jetphi'].fill(dataset=dataset, anacat=ilabel, jetphi=jetphi[icat], weight=Weights[icat]) output['jety'].fill(dataset=dataset, anacat=ilabel, jety=jety[icat], weight=Weights[icat]) output['jetdy'].fill(dataset=dataset, anacat=ilabel, jetdy=jetdy[icat], weight=Weights[icat]) output['numerator'].fill(dataset=dataset, anacat=ilabel, jetp=numerator[icat], weight=Weights[icat]) output['denominator'].fill(dataset=dataset, anacat=ilabel, jetp=denominator[icat], weight=Weights[icat]) output['jetmass'].fill(dataset=dataset, anacat=ilabel, jetmass=jetmass[icat], weight=Weights[icat]) output['SDmass'].fill(dataset=dataset, anacat=ilabel, jetmass=SDmass[icat], weight=Weights[icat]) output['tau32'].fill(dataset=dataset, anacat=ilabel, tau32=Tau32[icat], weight=Weights[icat]) output['tau32_2D'].fill(dataset=dataset, anacat=ilabel, jetpt=pT[icat], tau32=Tau32[icat], weight=Weights[icat]) output['deepTag_TvsQCD'].fill(dataset=dataset, anacat=ilabel, jetpt=pT[icat], tagger=deepTag[icat], weight=Weights[icat]) output['deepTagMD_TvsQCD'].fill(dataset=dataset, anacat=ilabel, jetpt=pT[icat], tagger=deepTagMD[icat], weight=Weights[icat]) return output
def process(self, df): output = self.accumulator.identity() datasetFull = df['dataset'] dataset = datasetFull.replace('_2016', '') isData = 'Data' in dataset year = 2016 yearStr = "2016" muTrigger = df['HLT_IsoMu24'] | df['HLT_IsoTkMu24'] eleTrigger = df['HLT_Ele27_WPTight_Gsf'] photonBitMapName = 'Photon_cutBased' #### These are already applied in the skim # filters = (df['Flag_goodVertices'] & # df['Flag_globalSuperTightHalo2016Filter'] & # df['Flag_HBHENoiseFilter'] & # df['Flag_HBHENoiseIsoFilter'] & # df['Flag_EcalDeadCellTriggerPrimitiveFilter'] & # df['Flag_BadPFMuonFilter'] # ) # if year > 2016: # filters = (filters & # df['Flag_ecalBadCalibFilterV2'] # ) muons = JaggedCandidateArray.candidatesfromcounts( df['nMuon'], pt=df['Muon_pt'], eta=df['Muon_eta'], phi=df['Muon_phi'], mass=df['Muon_mass'], charge=df['Muon_charge'], relIso=df['Muon_pfRelIso04_all'], tightId=df['Muon_tightId'], isPFcand=df['Muon_isPFcand'], isTracker=df['Muon_isTracker'], isGlobal=df['Muon_isGlobal'], ) electrons = JaggedCandidateArray.candidatesfromcounts( df['nElectron'], pt=df['Electron_pt'], eta=df['Electron_eta'], phi=df['Electron_phi'], mass=df['Electron_mass'], charge=df['Electron_charge'], cutBased=df['Electron_cutBased'], d0=df['Electron_dxy'], dz=df['Electron_dz'], ) jets = JaggedCandidateArray.candidatesfromcounts( df['nJet'], pt=df['Jet_pt'], eta=df['Jet_eta'], phi=df['Jet_phi'], mass=df['Jet_mass'], jetId=df['Jet_jetId'], btag=df['Jet_btagDeepB'], hadFlav=df['Jet_hadronFlavour'] if not isData else np.ones_like(df['Jet_jetId']), genIdx=df['Jet_genJetIdx'] if not isData else np.ones_like(df['Jet_jetId']), ) photons = JaggedCandidateArray.candidatesfromcounts( df['nPhoton'], pt=df['Photon_pt'], eta=df['Photon_eta'], phi=df['Photon_phi'], mass=np.zeros_like(df['Photon_pt']), isEE=df['Photon_isScEtaEE'], isEB=df['Photon_isScEtaEB'], photonId=df[photonBitMapName], passEleVeto=df['Photon_electronVeto'], pixelSeed=df['Photon_pixelSeed'], sieie=df['Photon_sieie'], chIso=df['Photon_pfRelIso03_chg'] * df['Photon_pt'], vidCuts=df['Photon_vidNestedWPBitmap'], genFlav=df['Photon_genPartFlav'] if not isData else np.ones_like(df['Photon_electronVeto']), genIdx=df['Photon_genPartIdx'] if not isData else np.ones_like(df['Photon_electronVeto']), ) if not isData: genPart = JaggedCandidateArray.candidatesfromcounts( df['nGenPart'], pt=df['GenPart_pt'], eta=df['GenPart_eta'], phi=df['GenPart_phi'], mass=df['GenPart_mass'], pdgid=df['GenPart_pdgId'], motherIdx=df['GenPart_genPartIdxMother'], status=df['GenPart_status'], statusFlags=df['GenPart_statusFlags'], ) genmotherIdx = genPart.motherIdx genpdgid = genPart.pdgid ## TTbar vs TTGamma Overlap Removal (work in progress, still buggy) doOverlapRemoval = False if 'TTbar' in dataset: doOverlapRemoval = True overlapPt = 10. overlapEta = 5. overlapDR = 0.1 if re.search("^W[1234]jets$", dataset): doOverlapRemoval = True overlapPt = 10. overlapEta = 2.5 overlapDR = 0.05 if 'DYjetsM' in dataset: doOverlapRemoval = True overlapPt = 15. overlapEta = 2.6 overlapDR = 0.05 if doOverlapRemoval: overlapPhoSelect = ((genPart.pt >= overlapPt) & (abs(genPart.eta) < overlapEta) & (genPart.pdgid == 22) & (genPart.status == 1)) OverlapPhotons = genPart[overlapPhoSelect] idx = OverlapPhotons.motherIdx maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, genpdgid.content, genpdgid.starts, genpdgid.stops, genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops) isNonPrompt = (maxParent > 37).any() finalGen = genPart[( (genPart.status == 1) | (genPart.status == 71)) & ~( (abs(genPart.pdgid) == 12) | (abs(genPart.pdgid) == 14) | (abs(genPart.pdgid) == 16))] genPairs = OverlapPhotons['p4'].cross(finalGen['p4'], nested=True) ##remove the case where the cross produce is the gen photon with itself genPairs = genPairs[~(genPairs.i0 == genPairs.i1)] dRPairs = genPairs.i0.delta_r(genPairs.i1) isOverlap = ((dRPairs.min() > overlapDR) & (maxParent < 37)).any() passOverlapRemoval = ~isOverlap else: passOverlapRemoval = np.ones_like(df['event']) == 1 muonSelectTight = ((muons.pt > 30) & (abs(muons.eta) < 2.4) & (muons.tightId) & (muons.relIso < 0.15)) muonSelectLoose = ((muons.pt > 15) & (abs(muons.eta) < 2.4) & ((muons.isPFcand) & (muons.isTracker | muons.isGlobal)) & (muons.relIso < 0.25) & np.invert(muonSelectTight)) eleEtaGap = (abs(electrons.eta) < 1.4442) | (abs(electrons.eta) > 1.566) elePassD0 = ((abs(electrons.eta) < 1.479) & (abs(electrons.d0) < 0.05) | (abs(electrons.eta) > 1.479) & (abs(electrons.d0) < 0.1)) elePassDZ = ((abs(electrons.eta) < 1.479) & (abs(electrons.dz) < 0.1) | (abs(electrons.eta) > 1.479) & (abs(electrons.dz) < 0.2)) electronSelectTight = ((electrons.pt > 35) & (abs(electrons.eta) < 2.1) & eleEtaGap & (electrons.cutBased >= 4) & elePassD0 & elePassDZ) electronSelectLoose = ((electrons.pt > 15) & (abs(electrons.eta) < 2.4) & eleEtaGap & (electrons.cutBased >= 1) & elePassD0 & elePassDZ & np.invert(electronSelectTight)) tightMuon = muons[muonSelectTight] looseMuon = muons[muonSelectLoose] tightElectron = electrons[electronSelectTight] looseElectron = electrons[electronSelectLoose] oneMuon = (tightMuon.counts == 1) muVeto = (tightMuon.counts == 0) oneEle = (tightElectron.counts == 1) eleVeto = (tightElectron.counts == 0) looseMuonSel = (looseMuon.counts == 0) looseElectronSel = (looseElectron.counts == 0) #### Calculate deltaR between photon and nearest muon ####### make combination pairs phoMu = photons['p4'].cross(tightMuon['p4'], nested=True) ####### check delta R of each combination, if min is >0.1 it is okay, or if there are no tight muons it passes dRphomu = (phoMu.i0.delta_r(phoMu.i1) > 0.4).all() | (tightMuon.counts == 0) phoEle = photons['p4'].cross(tightElectron['p4'], nested=True) dRphoele = ((phoEle.i0.delta_r(phoEle.i1)).min() > 0.4) | (tightElectron.counts == 0) #photon selection (no ID requirement used here) photonSelect = ((photons.pt > 20) & (abs(photons.eta) < 1.4442) & (photons.isEE | photons.isEB) & (photons.passEleVeto) & np.invert(photons.pixelSeed) & dRphomu & dRphoele) #split out the ID requirement, enabling Iso and SIEIE to be inverted for control regions photonID = photons.photonId >= 2 #parse VID cuts, define loose photons (not used yet) photon_MinPtCut = (photons.vidCuts >> 0 & 3) >= 2 photon_PhoSCEtaMultiRangeCut = (photons.vidCuts >> 2 & 3) >= 2 photon_PhoSingleTowerHadOverEmCut = (photons.vidCuts >> 4 & 3) >= 2 photon_PhoFull5x5SigmaIEtaIEtaCut = (photons.vidCuts >> 6 & 3) >= 2 photon_ChIsoCut = (photons.vidCuts >> 8 & 3) >= 2 photon_NeuIsoCut = (photons.vidCuts >> 10 & 3) >= 2 photon_PhoIsoCut = (photons.vidCuts >> 12 & 3) >= 2 photonID_NoChIsoSIEIE = (photon_MinPtCut & photon_PhoSCEtaMultiRangeCut & photon_PhoSingleTowerHadOverEmCut & photon_PhoFull5x5SigmaIEtaIEtaCut & photon_NeuIsoCut & photon_PhoIsoCut) tightPhotons = photons[photonSelect & photonID] loosePhotons = photons[photonSelect & photonID_NoChIsoSIEIE & photon_PhoFull5x5SigmaIEtaIEtaCut] loosePhotonsSideband = photons[photonSelect & photonID_NoChIsoSIEIE & (photons.sieie > 0.012)] ##medium jet ID cut jetIDbit = 1 if year > 2016: jetIDbit = 2 ##check dR jet,lepton & jet,photon jetMu = jets['p4'].cross(tightMuon['p4'], nested=True) dRjetmu = ( (jetMu.i0.delta_r(jetMu.i1)).min() > 0.4) | (tightMuon.counts == 0) jetEle = jets['p4'].cross(tightElectron['p4'], nested=True) dRjetele = ((jetEle.i0.delta_r(jetEle.i1)).min() > 0.4) | (tightElectron.counts == 0) jetPho = jets['p4'].cross(tightPhotons['p4'], nested=True) dRjetpho = ((jetPho.i0.delta_r(jetPho.i1)).min() > 0.1) | (tightPhotons.counts == 0) jetSelect = ((jets.pt > 30) & (abs(jets.eta) < 2.4) & ((jets.jetId >> jetIDbit & 1) == 1) & dRjetmu & dRjetele & dRjetpho) tightJets = jets[jetSelect] bTagWP = 0.6321 #2016 DeepCSV working point btagged = tightJets.btag > bTagWP bJets = tightJets[btagged] ## Define M3, mass of 3-jet pair with highest pT triJet = tightJets['p4'].choose(3) triJetPt = (triJet.i0 + triJet.i1 + triJet.i2).pt triJetMass = (triJet.i0 + triJet.i1 + triJet.i2).mass M3 = triJetMass[triJetPt.argmax()] leadingMuon = tightMuon[::1] leadingElectron = tightElectron[::1] leadingPhoton = tightPhotons[:, :1] leadingPhotonLoose = loosePhotons[:, :1] leadingPhotonSideband = loosePhotonsSideband[:, :1] # egammaMass = (leadingElectron['p4'] + leadingPhoton['p4']).mass egamma = leadingElectron['p4'].cross(leadingPhoton['p4']) mugamma = leadingMuon['p4'].cross(leadingPhoton['p4']) egammaMass = (egamma.i0 + egamma.i1).mass mugammaMass = (mugamma.i0 + mugamma.i1).mass if not isData: #### Photon categories, using genIdx branch # reco photons really generated as electrons idx = leadingPhoton.genIdx matchedPho = (genpdgid[idx] == 22).any() isMisIDele = (abs(genpdgid[idx]) == 11).any() maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, genpdgid.content, genpdgid.starts, genpdgid.stops, genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops) hadronicParent = maxParent > 25 isGenPho = matchedPho & ~hadronicParent isHadPho = matchedPho & hadronicParent isHadFake = ~(isMisIDele | isGenPho | isHadPho) & ( leadingPhoton.counts == 1) #define integer definition for the photon category axis phoCategory = 1 * isGenPho + 2 * isMisIDele + 3 * isHadPho + 4 * isHadFake isMisIDeleLoose = (leadingPhotonLoose.genFlav == 13).any() matchedPhoLoose = (leadingPhotonLoose.genFlav == 1).any() # look through parentage to find if any hadrons in genPhoton parent history idx = leadingPhotonLoose.genIdx maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, genpdgid.content, genpdgid.starts, genpdgid.stops, genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops) hadronicParent = maxParent > 25 isGenPhoLoose = matchedPhoLoose & ~hadronicParent isHadPhoLoose = matchedPhoLoose & hadronicParent isHadFakeLoose = ~(isMisIDeleLoose | isGenPhoLoose | isHadPhoLoose) & (leadingPhotonLoose.counts == 1) #define integer definition for the photon category axis phoCategoryLoose = 1 * isGenPhoLoose + 2 * isMisIDeleLoose + 3 * isHadPhoLoose + 4 * isHadFakeLoose isMisIDeleSideband = (leadingPhotonSideband.genFlav == 13).any() matchedPhoSideband = (leadingPhotonSideband.genFlav == 1).any() # look through parentage to find if any hadrons in genPhoton parent history idx = leadingPhotonSideband.genIdx maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, genpdgid.content, genpdgid.starts, genpdgid.stops, genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops) hadronicParent = maxParent > 25 isGenPhoSideband = matchedPhoSideband & ~hadronicParent isHadPhoSideband = matchedPhoSideband & hadronicParent isHadFakeSideband = ~(isMisIDeleSideband | isGenPhoSideband | isHadPhoSideband) & ( leadingPhotonSideband.counts == 1) #define integer definition for the photon category axis phoCategorySideband = 1 * isGenPhoSideband + 2 * isMisIDeleSideband + 3 * isHadPhoSideband + 4 * isHadFakeSideband else: phoCategory = np.ones_like(df['event']) phoCategoryLoose = np.ones_like(df['event']) phoCategorySideband = np.ones_like(df['event']) ### remove filter selection ### This is already applied in the skim, and is causing data to fail for some reason (the flag branches are duplicated in NanoAOD for data, is it causing problems???) # mu_noLoose = (muTrigger & filters & passOverlapRemoval & # oneMuon & eleVeto & # looseMuonSel & looseElectronSel) # ele_noLoose = (eleTrigger & filters & passOverlapRemoval & # oneEle & muVeto & # looseMuonSel & looseElectronSel) mu_noLoose = (muTrigger & passOverlapRemoval & oneMuon & eleVeto & looseMuonSel & looseElectronSel) ele_noLoose = (eleTrigger & passOverlapRemoval & oneEle & muVeto & looseMuonSel & looseElectronSel) lep_noLoose = mu_noLoose | ele_noLoose lep_jetSel = (lep_noLoose & (tightJets.counts >= 4) & (bJets.counts >= 1)) lep_zeropho = (lep_jetSel & (tightPhotons.counts == 0)) lep_phosel = (lep_jetSel & (tightPhotons.counts == 1)) lep_phoselLoose = (lep_jetSel & (loosePhotons.counts == 1)) lep_phoselSideband = (lep_jetSel & (loosePhotonsSideband.counts == 1)) lep_phosel_3j0t = (lep_noLoose & (tightJets.counts >= 3) & (bJets.counts == 0) & (tightPhotons.counts == 1)) lepFlavor = -0.5 * ele_noLoose + 0.5 * mu_noLoose evtWeight = np.ones_like(df['event'], dtype=np.float64) if not 'Data' in dataset: nMCevents = self.mcEventYields[datasetFull] xsec = crossSections[dataset] evtWeight *= xsec * lumis[year] / nMCevents #btag key name #name / working Point / type / systematic / jetType # ... / 0-loose 1-medium 2-tight / comb,mujets,iterativefit / central,up,down / 0-b 1-c 2-udcsg bJetSF_b = self.evaluator['btag%iDeepCSV_1_comb_central_0' % year]( tightJets[tightJets.hadFlav == 5].eta, tightJets[tightJets.hadFlav == 5].pt, tightJets[tightJets.hadFlav == 5].btag) bJetSF_c = self.evaluator['btag%iDeepCSV_1_comb_central_1' % year]( tightJets[tightJets.hadFlav == 4].eta, tightJets[tightJets.hadFlav == 4].pt, tightJets[tightJets.hadFlav == 4].btag) bJetSF_udcsg = self.evaluator[ 'btag%iDeepCSV_1_incl_central_2' % year](tightJets[tightJets.hadFlav == 0].eta, tightJets[tightJets.hadFlav == 0].pt, tightJets[tightJets.hadFlav == 0].btag) bJetSF = JaggedArray(content=np.ones_like(tightJets.pt.content, dtype=np.float64), starts=tightJets.starts, stops=tightJets.stops) bJetSF.content[(tightJets.hadFlav == 5).content] = bJetSF_b.content bJetSF.content[(tightJets.hadFlav == 4).content] = bJetSF_c.content bJetSF.content[( tightJets.hadFlav == 0).content] = bJetSF_udcsg.content ## mc efficiency lookup, data efficiency is eff* scale factor btagEfficiencies = taggingEffLookup(datasetFull, tightJets.hadFlav, tightJets.pt, tightJets.eta) btagEfficienciesData = btagEfficiencies * bJetSF ##probability is the product of all efficiencies of tagged jets, times product of 1-eff for all untagged jets ## https://twiki.cern.ch/twiki/bin/view/CMS/BTagSFMethods#1a_Event_reweighting_using_scale pMC = btagEfficiencies[btagged].prod() * ( 1. - btagEfficiencies[np.invert(btagged)]).prod() pData = btagEfficienciesData[btagged].prod() * ( 1. - btagEfficienciesData[np.invert(btagged)]).prod() btagWeight = pData / pMC btagWeight[pData == 0] = 0 evtWeight *= btagWeight eleID = self.ele_id_sf(tightElectron.eta, tightElectron.pt) eleIDerr = self.ele_id_err(tightElectron.eta, tightElectron.pt) eleRECO = self.ele_reco_sf(tightElectron.eta, tightElectron.pt) eleRECOerr = self.ele_reco_err(tightElectron.eta, tightElectron.pt) eleSF = (eleID * eleRECO).prod() eleSFup = ((eleID + eleIDerr) * (eleRECO + eleRECOerr)).prod() eleSFdo = ((eleID - eleIDerr) * (eleRECO - eleRECOerr)).prod() evtWeight *= eleSF muID = self.mu_id_sf(tightMuon.eta, tightMuon.pt) muIDerr = self.mu_id_err(tightMuon.eta, tightMuon.pt) muIso = self.mu_iso_sf(tightMuon.eta, tightMuon.pt) muIsoerr = self.mu_iso_err(tightMuon.eta, tightMuon.pt) muTrig = self.mu_iso_sf(abs(tightMuon.eta), tightMuon.pt) muTrigerr = self.mu_iso_err(abs(tightMuon.eta), tightMuon.pt) muSF = (muID * muIso * muTrig).prod() muSF_up = ((muID + muIDerr) * (muIso + muIsoerr) * (muTrig + muTrigerr)).prod() muSF_down = ((muID - muIDerr) * (muIso - muIsoerr) * (muTrig - muTrigerr)).prod() evtWeight *= muSF output['photon_pt'].fill( dataset=dataset, pt=tightPhotons.p4.pt[:, :1][lep_phosel].flatten(), category=phoCategory[lep_phosel].flatten(), lepFlavor=lepFlavor[lep_phosel], weight=evtWeight[lep_phosel].flatten()) output['photon_eta'].fill( dataset=dataset, eta=tightPhotons.eta[:, :1][lep_phosel].flatten(), category=phoCategory[lep_phosel].flatten(), lepFlavor=lepFlavor[lep_phosel], weight=evtWeight[lep_phosel].flatten()) output['photon_chIsoSideband'].fill( dataset=dataset, chIso=loosePhotonsSideband.chIso[:, :1] [lep_phoselSideband].flatten(), category=phoCategorySideband[lep_phoselSideband].flatten(), lepFlavor=lepFlavor[lep_phoselSideband], weight=evtWeight[lep_phoselSideband].flatten()) output['photon_chIso'].fill( dataset=dataset, chIso=loosePhotons.chIso[:, :1][lep_phoselLoose].flatten(), category=phoCategoryLoose[lep_phoselLoose].flatten(), lepFlavor=lepFlavor[lep_phoselLoose], weight=evtWeight[lep_phoselLoose].flatten()) output['photon_lepton_mass'].fill( dataset=dataset, mass=egammaMass[lep_phosel & ele_noLoose].flatten(), category=phoCategory[lep_phosel & ele_noLoose].flatten(), lepFlavor=lepFlavor[lep_phosel & ele_noLoose], weight=evtWeight[lep_phosel & ele_noLoose].flatten()) output['photon_lepton_mass'].fill( dataset=dataset, mass=mugammaMass[lep_phosel & mu_noLoose].flatten(), category=phoCategory[lep_phosel & mu_noLoose].flatten(), lepFlavor=lepFlavor[lep_phosel & mu_noLoose], weight=evtWeight[lep_phosel & mu_noLoose].flatten()) output['photon_lepton_mass_3j0t'].fill( dataset=dataset, mass=egammaMass[lep_phosel_3j0t & ele_noLoose].flatten(), category=phoCategory[lep_phosel_3j0t & ele_noLoose].flatten(), lepFlavor=lepFlavor[lep_phosel_3j0t & ele_noLoose], weight=evtWeight[lep_phosel_3j0t & ele_noLoose].flatten()) output['photon_lepton_mass_3j0t'].fill( dataset=dataset, mass=mugammaMass[lep_phosel_3j0t & mu_noLoose].flatten(), category=phoCategory[lep_phosel_3j0t & mu_noLoose].flatten(), lepFlavor=lepFlavor[lep_phosel_3j0t & mu_noLoose], weight=evtWeight[lep_phosel_3j0t & mu_noLoose].flatten()) output['M3'].fill(dataset=dataset, M3=M3[lep_phosel].flatten(), category=phoCategoryLoose[lep_phosel].flatten(), lepFlavor=lepFlavor[lep_phosel], weight=evtWeight[lep_phosel].flatten()) output['M3Presel'].fill(dataset=dataset, M3=M3[lep_zeropho].flatten(), lepFlavor=lepFlavor[lep_zeropho], weight=evtWeight[lep_zeropho].flatten()) output['EventCount'] = len(df['event']) return output
def convert_jec_txt_file(jecFilePath): jec_f = open(jecFilePath,'r') layoutstr = jec_f.readline().strip().strip('{}') jec_f.close() name = jecFilePath.split('/')[-1].split('.')[0] layout = layoutstr.split() if not layout[0].isdigit(): raise Exception('First column of JEC descriptor must be a digit!') #setup the file format nBinnedVars = int(layout[0]) nBinColumns = 2*nBinnedVars nEvalVars = int(layout[nBinnedVars+1]) formula = layout[nBinnedVars+nEvalVars+2] nParms = 0 while( formula.count('[%i]'%nParms) ): formula = formula.replace('[%i]'%nParms,'p%i'%nParms) nParms += 1 #protect function names with vars in them funcs_to_cap = ['max','exp'] for f in funcs_to_cap: formula = formula.replace(f,f.upper()) templatevars = ['x','y','z','w','t','s'] varnames = [layout[i+nBinnedVars+2] for i in range(nEvalVars)] for find,replace in zip(templatevars,varnames): formula = formula.replace(find,replace) #restore max for f in funcs_to_cap: formula = formula.replace(f.upper(),f) nFuncColumns = 2*nEvalVars + nParms nTotColumns = nFuncColumns + 1 #parse the columns minMax = ['Min','Max'] columns = [] dtypes = [] offset = 1 for i in range(nBinnedVars): columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax]) dtypes.extend(['<f8','<f8']) columns.append('NVars') dtypes.append('<i8') offset += nBinnedVars + 1 for i in range(nEvalVars): columns.extend(['%s%s'%(layout[i+offset],mm) for mm in minMax]) dtypes.extend(['<f8','<f8']) for i in range(nParms): columns.append('p%i'%i) dtypes.append('<f8') pars = np.genfromtxt(jecFilePath, dtype=tuple(dtypes), names=tuple(columns), skip_header=1, unpack=True, encoding='ascii' ) #the first bin is always usual for JECs #the next bins may vary in number, so they're jagged arrays... yay bins = {} offset_col = 0 offset_name = 1 bin_order = [] for i in range(nBinnedVars): binMins = None binMaxs = None if i == 0: binMins = np.unique(pars[columns[0]]) binMaxs = np.unique(pars[columns[1]]) bins[layout[i+offset_name]] = np.union1d(binMins,binMaxs) else: counts = np.zeros(0,dtype=np.int) allBins = np.zeros(0,dtype=np.double) for binMin in bins[bin_order[0]][:-1]: binMins = np.unique(pars[np.where(pars[columns[0]] == binMin)][columns[i+offset_col]]) binMaxs = np.unique(pars[np.where(pars[columns[0]] == binMin)][columns[i+offset_col+1]]) theBins = np.union1d(binMins,binMaxs) allBins = np.append(allBins,theBins) counts = np.append(counts,theBins.size) bins[layout[i+offset_name]] = JaggedArray.fromcounts(counts,allBins) bin_order.append(layout[i+offset_name]) offset_col += 1 #skip nvars to the variable columns #the columns here define clamps for the variables defined in columns[] # ----> clamps can be different from bins # ----> if there is more than one binning variable this array is jagged # ----> just make it jagged all the time binshapes = tuple([bins[thebin].size-1 for thebin in bin_order]) clamp_mins = {} clamp_maxs = {} var_order = [] offset_col = 2*nBinnedVars+1 offset_name = nBinnedVars + 2 jagged_counts = np.ones(bins[bin_order[0]].size-1,dtype=np.int) if len(bin_order) > 1: jagged_counts = np.maximum(bins[bin_order[1]].counts - 1,0) #need counts-1 since we only care about Nbins for i in range(nEvalVars): clamp_mins[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col]])) clamp_maxs[layout[i+offset_name]] = JaggedArray.fromcounts(jagged_counts,np.atleast_1d(pars[columns[i+offset_col+1]])) var_order.append(layout[i+offset_name]) offset_col += 1 #now get the parameters, which we will look up with the clamps parms = [] parm_order = [] offset_col = 2*nBinnedVars+1 + 2*nEvalVars for i in range(nParms): parms.append(JaggedArray.fromcounts(jagged_counts,pars[columns[i+offset_col]])) parm_order.append('p%i'%(i)) wrapped_up = {} wrapped_up[(name,'jet_energy_corrector')] = (formula, (bins,bin_order), (clamp_mins,clamp_maxs,var_order), (parms,parm_order)) return wrapped_up
def jagged_1(): boundaries = [0, 3, 5, 6, 9, 12, 12] return JaggedArray( boundaries[:-1], boundaries[1:], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0, 11.0])
loose_muons = muons[loose_muon_selection] loose_photons = photons[loose_photon_selection] selected_taus = taus[tau_selection] selected_jets = jets[jet_selection] # end seletion # clean leptons e_combinations = loose_electrons.p4.cross(selected_jets.p4, nested=True) mask = (e_combinations.i0.delta_r(e_combinations.i1) < 0.3).any() clean_electrons = loose_electrons[~mask] m_combinations = loose_muons.p4.cross(selected_jets.p4, nested=True) mask = (m_combinations.i0.delta_r(m_combinations.i1) < 0.3).any() clean_muons = loose_muons[mask] clean_leptons = JaggedArray.fromiter([clean_electrons, clean_muons]) # once merge is done # mask = loose_electrons.p4.match(selected_jets.p4, 0.3) # clean electrons = loose_electrons[~mask] # end cleaning # weights evaluation e_counts = clean_electrons.counts e_sfTrigg = np.ones(clean_electrons.size) e_sfTrigg[e_counts > 0] = 1 - evaluator["hEffEtaPt"]( clean_electrons.eta[e_counts > 0, 0], clean_electrons.pt[e_counts > 0, 0]) e_sfTrigg[e_counts > 1] = 1 - (1 - evaluator["hEffEtaPt"]( clean_electrons.eta[e_counts > 1, 0], clean_electrons.pt[e_counts > 1, 0])) * (
def process(self, events): logging.debug('starting process') output = self.accumulator.identity() dataset = events.metadata['dataset'] self._isData = dataset in [ 'SingleMuon', 'DoubleMuon', 'SingleElectron', 'DoubleEG', 'EGamma', 'MuonEG' ] selection = processor.PackedSelection() # TODO: instead of cutflow, use processor.PackedSelection output['cutflow']['all events'] += events.size logging.debug('applying lumi mask') if self._isData: lumiMask = lumi_tools.LumiMask(self._corrections['golden']) events['passLumiMask'] = lumiMask(np.array(events.run), np.array(events.luminosityBlock)) else: events['passLumiMask'] = np.ones_like(events.run, dtype=bool) passLumiMask = events.passLumiMask selection.add('lumiMask', passLumiMask) logging.debug('adding trigger') self._add_trigger(events) passHLT = events.passHLT selection.add('trigger', passHLT) output['cutflow']['pass trigger'] += passHLT.sum() # if no trigger: fast return if passHLT.sum() == 0: return output # require one good vertex logging.debug('checking vertices') passGoodVertex = (events.PV.npvsGood > 0) output['cutflow']['good vertex'] += passGoodVertex.sum() selection.add('goodVertex', passGoodVertex) # run rochester rochester = self._rochester _muon_offsets = events.Muon.pt.offsets _charge = events.Muon.charge _pt = events.Muon.pt _eta = events.Muon.eta _phi = events.Muon.phi if self._isData: _k = rochester.kScaleDT(_charge, _pt, _eta, _phi) # _kErr = rochester.kScaleDTerror(_charge, _pt, _eta, _phi) else: # for default if gen present _gpt = events.Muon.matched_gen.pt # for backup w/o gen _nl = events.Muon.nTrackerLayers _u = JaggedArray.fromoffsets(_muon_offsets, np.random.rand(*_pt.flatten().shape)) _hasgen = (_gpt.fillna(-1) > 0) _kspread = rochester.kSpreadMC(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen], _gpt[_hasgen]) _ksmear = rochester.kSmearMC(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen], _nl[~_hasgen], _u[~_hasgen]) _k = np.ones_like(_pt.flatten()) _k[_hasgen.flatten()] = _kspread.flatten() _k[~_hasgen.flatten()] = _ksmear.flatten() _k = JaggedArray.fromoffsets(_muon_offsets, _k) # _kErrspread = rochester.kSpreadMCerror(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen], # _gpt[_hasgen]) # _kErrsmear = rochester.kSmearMCerror(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen], # _nl[~_hasgen], _u[~_hasgen]) # _kErr = np.ones_like(_pt.flatten()) # _kErr[_hasgen.flatten()] = _kErrspread.flatten() # _kErr[~_hasgen.flatten()] = _kErrsmear.flatten() # _kErr = JaggedArray.fromoffsets(_muon_offsets, _kErr) mask = _pt.flatten() < 200 rochester_pt = _pt.flatten() rochester_pt[mask] = (_k * _pt).flatten()[mask] events.Muon['pt'] = JaggedArray.fromoffsets(_muon_offsets, rochester_pt) logging.debug('adding muon id') self._add_muon_id(events.Muon) logging.debug('adding electron id') self._add_electron_id(events.Electron) logging.debug('selecting muons') muonId = (events.Muon.passId > 0) muons = events.Muon[muonId] logging.debug('selecting electrons') electronId = (events.Electron.passId > 0) electrons = events.Electron[electronId] passTwoLeptons = (muons.counts >= 2) | (electrons.counts >= 2) output['cutflow']['two leptons'] += passTwoLeptons.sum() selection.add('twoLeptons', passTwoLeptons) # build cands # remake z to have same columns # pt eta phi mass charge pdgId logging.debug('rebuilding leptons') def rebuild(leptons): return JaggedCandidateArray.candidatesfromoffsets( leptons.offsets, pt=leptons.pt.flatten(), eta=leptons.eta.flatten(), phi=leptons.phi.flatten(), mass=leptons.mass.flatten(), charge=leptons.charge.flatten(), pdgId=leptons.pdgId.flatten(), # needed for electron SF etaSC=leptons.etaSC.flatten() if hasattr(leptons, 'etaSC') else leptons.eta.flatten(), ) newMuons = rebuild(muons) newElectrons = rebuild(electrons) logging.debug('building 2 leptons') ee_cands = newElectrons.choose(2) mm_cands = newMuons.choose(2) # combine them z_cands = JaggedArray.concatenate([ee_cands, mm_cands], axis=1) def bestcombination(zcands): good_charge = sum(zcands[str(i)]['charge'] for i in range(2)) == 0 # this keeps the first z cand in each event # should instead sort the best first # TODO: select best zcands = zcands[good_charge][:, :1] return zcands logging.debug('selecting best combinations') z_cands = bestcombination(z_cands) z1 = np.zeros_like(z_cands['p4'].pt.flatten(), dtype='i') z2 = np.ones_like(z_cands['p4'].pt.flatten(), dtype='i') z1[(z_cands['0']['p4'].pt.flatten() < z_cands['1']['p4'].pt.flatten())] = 1 z2[(z_cands['0']['p4'].pt.flatten() < z_cands['1']['p4'].pt.flatten())] = 0 z1 = JaggedArray.fromoffsets(z_cands.offsets, z1) z2 = JaggedArray.fromoffsets(z_cands.offsets, z2) passZCand = (z_cands.counts > 0) output['cutflow']['z cand'] += passZCand.sum() selection.add('zCand', passZCand) passMassWindow = (passZCand & z_cands[( (z_cands.p4.mass > 60) & (z_cands.p4.mass < 120))].counts > 0) output['cutflow']['mass window'] += passMassWindow.sum() selection.add('massWindow', passMassWindow) # im sure there is a better way, but for now just do this def get_lepton_values(zl, key): val = np.zeros_like(zl.flatten(), dtype=float) if len(val) == 0: return JaggedArray.fromoffsets(zl.offsets, val) for i in range(2): mask = (i == zl.flatten()) if key == 'pt': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].pt elif key == 'eta': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].eta elif key == 'phi': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].phi elif key == 'mass': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].mass else: val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key] return JaggedArray.fromoffsets(zl.offsets, val) z1pt = get_lepton_values(z1, 'pt') z2pt = get_lepton_values(z2, 'pt') passPt = ((z1pt > 30) & (z2pt > 20)).counts > 0 output['cutflow']['pt threshold'] += passPt.sum() selection.add('ptThreshold', passPt) chanSels = {} z1pdg = get_lepton_values(z1, 'pdgId') z2pdg = get_lepton_values(z2, 'pdgId') for chan in ['ee', 'mm']: if chan == 'ee': pdgIds = (11, 11) if chan == 'mm': pdgIds = (13, 13) chanSels[chan] = ((abs(z1pdg) == pdgIds[0]) & (abs(z2pdg) == pdgIds[1])) weights = processor.Weights(events.run.size) if self._isData: output['sumw'][dataset] = 0 # always set to 0 for data else: output['sumw'][dataset] += events.genWeight.sum() weights.add('genWeight', events.genWeight) weights.add( 'pileupWeight', self._corrections['pileupWeight'](events.Pileup.nPU), self._corrections['pileupWeightUp'](events.Pileup.nPU), self._corrections['pileupWeightDown'](events.Pileup.nPU), ) zls = [z1, z2] # electron sf for ei, zl in enumerate(zls): ei = str(ei) eta = get_lepton_values(zl, 'etaSC') pt = get_lepton_values(zl, 'pt') electronRecoSF = self._corrections['electron_reco'](eta, pt) electronIdSF = self._corrections['electron_id_MVA90'](eta, pt) electronSF = np.ones_like(electronRecoSF.prod()) if ei in ['0', '1']: chans = ['ee'] else: chans = [] for chan in chans: # turns empty arrays into 0's, nonempty int 1's chanSel = (chanSels[chan].ones_like().sum() > 0) electronSF[chanSel] *= electronRecoSF[chanSel].prod() electronSF[chanSel] *= electronIdSF[chanSel].prod() weights.add('electronSF' + ei, electronSF) # muon SF for mi, zl in enumerate(zls): mi = str(mi) eta = get_lepton_values(zl, 'eta') pt = get_lepton_values(zl, 'pt') if self._year == '2016': idSF = self._corrections['muon_id_MediumID'](eta, pt) isoSF = self._corrections['muon_iso_TightRelIso_MediumID']( eta, pt) else: idSF = self._corrections['muon_id_MediumPromptID']( pt, abs(eta)) isoSF = self._corrections['muon_iso_TightRelIso_MediumID']( pt, abs(eta)) muonSF = np.ones_like(idSF.prod()) if mi in ['0', '1']: chans = ['mm'] else: chans = [] for chan in chans: # turns empty arrays into 0's, nonempty int 1's chanSel = (chanSels[chan].ones_like().sum() > 0) muonSF[chanSel] *= idSF[chanSel].prod() muonSF[chanSel] *= isoSF[chanSel].prod() weights.add('muonSF' + mi, muonSF) logging.debug('filling') for sel in self._selections: if sel == 'massWindow': cut = selection.all('lumiMask', 'trigger', 'goodVertex', 'twoLeptons', 'zCand', 'massWindow', 'ptThreshold') for chan in ['ee', 'mm']: chanSel = chanSels[chan] weight = chanSel.astype(float) * weights.weight() output[sel + '_zmass'].fill( dataset=dataset, channel=chan, mass=z_cands[cut].p4.mass.flatten(), weight=weight[cut].flatten(), ) output[sel + '_met'].fill( dataset=dataset, channel=chan, met=events.MET.pt[cut], weight=weight[cut].flatten(), ) output[sel + '_pileup'].fill( dataset=dataset, channel=chan, npvs=events.PV.npvs[cut], weight=weight[cut].flatten(), ) return output
treename, branches=branches, namedecode='utf-8', entrysteps=200000)): charge = arrays['Muon_charge'] pt = arrays['Muon_pt'] eta = arrays['Muon_eta'] phi = arrays['Muon_phi'] if not isData: # for default if gen present gid = arrays['Muon_genPartIdx'] gpt = arrays['GenPart_pt'] # for backup w/o gen nl = arrays['Muon_nTrackerLayers'] u = np.random.rand(*pt.flatten().shape) u = JaggedArray.fromoffsets(pt.offsets, u) fullu += [u] for ie in range(len(pt)): subres = [] suberr = [] for im in range(len(pt[ie])): if isData: subres += [ roccor.kScaleDT(int(charge[ie][im]), float(pt[ie][im]), float(eta[ie][im]), float(phi[ie][im])) ] suberr += [ roccor.kScaleDTerror(int(charge[ie][im]), float(pt[ie][im]), float(eta[ie][im]), float(phi[ie][im]))
def jagged_1(): return JaggedArray.fromiter([[0.0, 1.1, 2.2], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8], [9.9, 10.0, 11.0], []])
def _build_standard_jme_lookup(name, layout, pars, nBinnedVars, nBinColumns, nEvalVars, formula, nParms, columns, dtypes, interpolatedFunc=False): #the first bin is always usual for JECs #the next bins may vary in number, so they're jagged arrays... yay bins = {} offset_col = 0 offset_name = 1 bin_order = [] for i in range(nBinnedVars): binMins = None binMaxs = None if i == 0: binMins = np.unique(pars[columns[0]]) binMaxs = np.unique(pars[columns[1]]) bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs) else: counts = np.zeros(0, dtype=np.int) allBins = np.zeros(0, dtype=np.double) for binMin in bins[bin_order[0]][:-1]: binMins = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col]]) binMaxs = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col + 1]]) theBins = np.union1d(binMins, binMaxs) allBins = np.append(allBins, theBins) counts = np.append(counts, theBins.size) bins[layout[i + offset_name]] = JaggedArray.fromcounts( counts, allBins) bin_order.append(layout[i + offset_name]) offset_col += 1 #skip nvars to the variable columns #the columns here define clamps for the variables defined in columns[] # ----> clamps can be different from bins # ----> if there is more than one binning variable this array is jagged # ----> just make it jagged all the time binshapes = tuple([bins[thebin].size - 1 for thebin in bin_order]) clamp_mins = {} clamp_maxs = {} var_order = [] offset_col = 2 * nBinnedVars + 1 offset_name = nBinnedVars + 2 jagged_counts = np.ones(bins[bin_order[0]].size - 1, dtype=np.int) if len(bin_order) > 1: jagged_counts = np.maximum( bins[bin_order[1]].counts - 1, 0) #need counts-1 since we only care about Nbins for i in range(nEvalVars): var_order.append(layout[i + offset_name]) if not interpolatedFunc: clamp_mins[layout[i + offset_name]] = JaggedArray.fromcounts( jagged_counts, np.atleast_1d(pars[columns[i + offset_col]])) clamp_maxs[layout[i + offset_name]] = JaggedArray.fromcounts( jagged_counts, np.atleast_1d(pars[columns[i + offset_col + 1]])) offset_col += 1 #now get the parameters, which we will look up with the clamped values parms = [] parm_order = [] offset_col = 2 * nBinnedVars + 1 + (interpolatedFunc == False) * 2 * nEvalVars for i in range(nParms): parms.append( JaggedArray.fromcounts(jagged_counts, pars[columns[i + offset_col]])) parm_order.append('p%i' % (i)) wrapped_up = {} wrapped_up[(name, 'jme_standard_function')] = (formula, (bins, bin_order), (clamp_mins, clamp_maxs, var_order), (parms, parm_order)) return wrapped_up