def test_IndexedOptionArray(): content = awkward1.Array([1.1, 2.2, 3.3, 4.4, 5.5]).layout index = awkward1.layout.Index64(numpy.array([4, 2, -1, -1, 1, 0, 1])) array = awkward1.Array(awkward1.layout.IndexedOptionArray64( index, content)) assert array.tolist() == [5.5, 3.3, None, None, 2.2, 1.1, 2.2] assert awkward1.min(array, axis=0) == 1.1 assert awkward1.argmin(array, axis=0) == 5 assert awkward1.argmin(awkward1.Array([[2.2, 1.1], [None, 3.3], [2.2, 1.1]]), axis=-1).tolist() == [1, 1, 1] assert awkward1.argmin(awkward1.Array([[2.2, 1.1], [None, 3.3], [2.2, None, 1.1]]), axis=-1).tolist() == [1, 1, 2] assert awkward1.argmin(awkward1.Array([[2.2, 1.1], [3.3, None], [2.2, None, 1.1]]), axis=-1).tolist() == [1, 0, 2] assert awkward1.argmin(awkward1.Array([[2.2, 1.1, 0.0], [], [None, 0.5], [2, 1]]), axis=0).tolist() == [3, 2, 0] assert awkward1.argmin(awkward1.Array([[2.2, 1.1, 0.0], [], [0.5, None], [2, 1]]), axis=0).tolist() == [2, 3, 0] assert awkward1.argmin(awkward1.Array([[2.2, 1.1, 0.0], [0.5, None], [], [2, 1]]), axis=0).tolist() == [1, 3, 0]
def test_jagged_axis0(): assert awkward1.min(awkward1.Array([[1.1, 5.5], [4.4], [2.2, 3.3, 0.0, -10]]), axis=0).tolist() == [1.1, 3.3, 0, -10] assert awkward1.argmin(awkward1.Array([[1.1, 5.5], [4.4], [2.2, 3.3, 0.0, -10]]), axis=0).tolist() == [0, 2, 2, 2]
def embed_crossref(source, idx_name, dest, dest_name): """Embed a cross-reference Parameters ---------- source : ak.Array any array with shape N * var * {record} idx_name : str A field in the source record dest : ak.Array any array with shape N * var * {record}, where: ``ak.max(source[idx_name], axis=1) < ak.num(dest)`` and ``ak.min(source[idx_name], axis=1) >= 0`` """ print(ak.max(source[idx_name], axis=1)) print(ak.num(dest)) print(ak.all(ak.max(source[idx_name], axis=1) < ak.num(dest))) assert ak.all(ak.max(source[idx_name], axis=1) < ak.num(dest)) assert ak.all(ak.min(source[idx_name], axis=1) >= 0) id_global = ak.flatten( source[idx_name] + np.asarray(dest.layout.starts), axis=None ) source[dest_name] = ak.Array( ak.layout.ListOffsetArray64( source.layout.offsets, ak.layout.ListOffsetArray64( source.layout.content.offsets, ak.flatten(dest)[id_global].layout, ), ) )
def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet)>2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): ## Generated leptons gen_lep = ev.GenL leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))] trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))] ## Muons muon = Collections(ev, "Muon", "tightSSTTH").get() vetomuon = Collections(ev, "Muon", "vetoTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "tightSSTTH").get() vetoelectron = Collections(ev, "Electron", "vetoTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1) lepton = ak.concatenate([muon, electron], axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] dilepton_mass = (leading_lepton+trailing_lepton).mass dilepton_pt = (leading_lepton+trailing_lepton).pt dilepton_dR = delta_r(leading_lepton, trailing_lepton) lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon) n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) ## Tau and other stuff tau = getTaus(ev) track = getIsoTracks(ev) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta)<2.4)] btag = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2] bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) ## forward jets j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator jf = cross(j_fwd, jet) mjf = (jf['0']+jf['1']).mass j_fwd2 = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd delta_eta = abs(j_fwd2.eta - j_fwd.eta) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) # define the weight weight = Weights( len(ev) ) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight*cfg['lumi'][self.year]) #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) cutflow = Cutflow(output, ev, weight=weight) sel = Selection( dataset = dataset, events = ev, year = self.year, ele = electron, ele_veto = vetoelectron, mu = muon, mu_veto = vetomuon, jet_all = jet, jet_central = central, jet_btag = btag, jet_fwd = fwd, met = ev.MET, ) BL = sel.dilep_baseline(cutflow=cutflow, SS=True) weight_BL = weight.weight()[BL] if True: # define the inputs to the NN # this is super stupid. there must be a better way. NN_inputs = np.stack([ ak.to_numpy(ak.num(jet[BL])), ak.to_numpy(ak.num(tau[BL])), ak.to_numpy(ak.num(track[BL])), ak.to_numpy(st[BL]), ak.to_numpy(ev.MET[BL].pt), ak.to_numpy(ak.max(mjf[BL], axis=1)), ak.to_numpy(pad_and_flatten(delta_eta[BL])), ak.to_numpy(pad_and_flatten(leading_lepton[BL].pt)), ak.to_numpy(pad_and_flatten(leading_lepton[BL].eta)), ak.to_numpy(pad_and_flatten(trailing_lepton[BL].pt)), ak.to_numpy(pad_and_flatten(trailing_lepton[BL].eta)), ak.to_numpy(pad_and_flatten(dilepton_mass[BL])), ak.to_numpy(pad_and_flatten(dilepton_pt[BL])), ak.to_numpy(pad_and_flatten(j_fwd[BL].pt)), ak.to_numpy(pad_and_flatten(j_fwd[BL].p)), ak.to_numpy(pad_and_flatten(j_fwd[BL].eta)), ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].pt)), ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].pt)), ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].eta)), ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].eta)), ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].pt)), ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].pt)), ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].eta)), ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].eta)), ak.to_numpy(min_bl_dR[BL]), ak.to_numpy(min_mt_lep_met[BL]), ]) NN_inputs = np.moveaxis(NN_inputs, 0, 1) model, scaler = load_onnx_model('v8') try: NN_inputs_scaled = scaler.transform(NN_inputs) NN_pred = predict_onnx(model, NN_inputs_scaled) best_score = np.argmax(NN_pred, axis=1) except ValueError: #print ("Empty NN_inputs") NN_pred = np.array([]) best_score = np.array([]) NN_inputs_scaled = NN_inputs #k.clear_session() output['node'].fill(dataset=dataset, multiplicity=best_score, weight=weight_BL) output['node0_score_incl'].fill(dataset=dataset, score=NN_pred[:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL) output['node0_score'].fill(dataset=dataset, score=NN_pred[best_score==0][:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==0]) output['node1_score'].fill(dataset=dataset, score=NN_pred[best_score==1][:,1] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==1]) output['node2_score'].fill(dataset=dataset, score=NN_pred[best_score==2][:,2] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==2]) output['node3_score'].fill(dataset=dataset, score=NN_pred[best_score==3][:,3] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==3]) output['node4_score'].fill(dataset=dataset, score=NN_pred[best_score==4][:,4] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==4]) SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0))) SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0))) leading_lepton_BL = leading_lepton[BL] output['lead_lep_SR_pp'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)), weight = weight_BL[SR_sel_pp] ) output['lead_lep_SR_mm'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)), weight = weight_BL[SR_sel_mm] ) del model del scaler del NN_inputs, NN_inputs_scaled, NN_pred # first, make a few super inclusive plots output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL) output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL) output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight_BL) output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight_BL) output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight_BL) output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL) output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL) output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight_BL) output['ST'].fill(dataset=dataset, pt=st[BL], weight=weight_BL) output['HT'].fill(dataset=dataset, pt=ht[BL], weight=weight_BL) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL) output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL) output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL) output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL) output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL) output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL) output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL) output['MET'].fill( dataset = dataset, pt = ev.MET[BL].pt, phi = ev.MET[BL].phi, weight = weight_BL ) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): output['lead_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)), weight = weight_BL ) output['trail_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)), weight = weight_BL ) output['lead_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)), eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)), phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)), weight = weight_BL ) output['trail_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)), eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)), phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)), weight = weight_BL ) output['j1'].fill( dataset = dataset, pt = ak.flatten(jet.pt_nom[:, 0:1][BL]), eta = ak.flatten(jet.eta[:, 0:1][BL]), phi = ak.flatten(jet.phi[:, 0:1][BL]), weight = weight_BL ) output['j2'].fill( dataset = dataset, pt = ak.flatten(jet[:, 1:2][BL].pt_nom), eta = ak.flatten(jet[:, 1:2][BL].eta), phi = ak.flatten(jet[:, 1:2][BL].phi), weight = weight_BL ) output['j3'].fill( dataset = dataset, pt = ak.flatten(jet[:, 2:3][BL].pt_nom), eta = ak.flatten(jet[:, 2:3][BL].eta), phi = ak.flatten(jet[:, 2:3][BL].phi), weight = weight_BL ) output['fwd_jet'].fill( dataset = dataset, pt = ak.flatten(j_fwd[BL].pt), eta = ak.flatten(j_fwd[BL].eta), phi = ak.flatten(j_fwd[BL].phi), weight = weight_BL ) output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight_BL) return output
def test_jagged_axis1(): # first is [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [4, 3, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [5, 4, 3]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[], [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [6, 5, 4]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [5, 4, 3]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [5, 4, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [5, 3, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0], []]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [4, 3, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [4, 3, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1, 999, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3, 999]] assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [4, 3, 2, 0]] # first is [[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [4, 3, 2]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [5, 4, 3]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[], [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [6, 5, 4]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [5, 4, 3]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [5, 4, 2]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [5, 3, 2]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0], []]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [4, 3, 2]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [4, 3, 2]] array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]], [[1.1, 999, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3, 999]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [4, 3, 2, 0]] # first is [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [4, 3, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [5, 4, 3]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[], [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [6, 5, 4]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [5, 4, 3]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [5, 4, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [5, 3, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0], []]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [4, 3, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[1.1, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [4, 3, 2]] array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]], [[1.1, 999, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]]) assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3, 999]] assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [4, 3, 2, 0]]
def process(self, events): output = self._accumulator.identity() dataset_name = events.metadata['dataset'] output["total_events"][dataset_name] += events.__len__() # HLT selection HLT_mask = [] if year == "2016": if "SingleMuon" in dataset_name: if "2016B2" in dataset_name: HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 else: HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50 else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList if "2016B2" in dataset_name: HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID elif "2016H" in dataset_name: HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID else: HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID if year == "2017": if "SingleMuon" in dataset_name: if "2017B" in dataset_name: HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 else: HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100 else: HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 if year == "2018": if "SingleMuon" in dataset_name: HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100 else: HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 # Require 3 jets jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight) event_mask = (awk.sum(jet_mask, axis=1) >= 3) event_mask = event_mask & HLT_mask events_3j = events[event_mask] # Reduce jet mask to only events with 3 good jets jet_mask = jet_mask[event_mask] # Array of the jets to consider for trijet resonance selected_jets = events_3j.Jet[jet_mask][:, :3] # Pairs of jets pairs = [(0, 1), (1, 2), (2, 0)] jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0] m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j]) dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta) max_dR = awk.max(dR_ij, axis=1) max_dEta = awk.max(dEta_ij, axis=1) min_dR = awk.min(dR_ij, axis=1) min_dEta = awk.min(dEta_ij, axis=1) min_pT = awk.min(selected_jets.pt, axis=1) max_eta = abs(awk.max(selected_jets.eta, axis=1)) jet_k = [2, 0, 1] dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] + selected_jets[:, jet_k]) dEta_i_jk = abs(selected_jets[:, jet_i].eta - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).eta) dPhi_i_jk = abs(selected_jets[:, jet_i].phi - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).phi) dPt_i_jk = abs(selected_jets[:, jet_i].pt - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).pt) max_dPhi_jjj = awk.max(dPhi_i_jk, axis=1) m3j = selected_jets.sum().mass pt_i_overM = selected_jets.pt / m3j max_pt_overM = awk.max(pt_i_overM, axis=1) min_pt_overM = awk.min(pt_i_overM, axis=1) m_01_overM = m_ij[:, 0] / m3j m_12_overM = m_ij[:, 1] / m3j m_20_overM = m_ij[:, 2] / m3j for pt_cut in range(30, 1150, 5): cut_name = f"min_pT_cut{pt_cut}".format(pt_cut) selection = PackedSelection() selection.add("MinJetPt_cut", min_pT > pt_cut) sel_mask = selection.require( **{name: True for name in selection.names}) output[f"N_min_pT_cut{pt_cut}".format( pt_cut)][dataset_name] += events_3j[sel_mask].__len__() for eta_cut in np.arange(0, 2.5, 0.05): cut_name = f"max_eta_cut{eta_cut}".format(eta_cut) selection = PackedSelection() selection.add("MaxJetEta_cut", max_eta < eta_cut) sel_mask = selection.require( **{name: True for name in selection.names}) output[f"N_max_eta_cut{eta_cut}".format( eta_cut)][dataset_name] += events_3j[sel_mask].__len__() for dEta_max_cut in np.arange(0, 5, 0.1): cut_name = f"dEta_max_cut{dEta_max_cut}".format(dEta_max_cut) selection = PackedSelection() selection.add("MaxJJdEta_cut", max_dEta < dEta_max_cut) sel_mask = selection.require( **{name: True for name in selection.names}) output[f"N_dEta_jj_max_cut{dEta_max_cut}".format( dEta_max_cut)][dataset_name] += events_3j[sel_mask].__len__() for dR_min_cut in np.arange(0, 5, 0.1): cut_name = f"dR_min_cut{dR_min_cut}".format(dR_min_cut) selection = PackedSelection() selection.add("MinJJdR_cut", min_dR > dR_min_cut) sel_mask = selection.require( **{name: True for name in selection.names}) output[f"N_dR_jj_min_cut{dR_min_cut}".format( dR_min_cut)][dataset_name] += events_3j[sel_mask].__len__() #min cut for the variable dPhi_jjj_max # for dPhi_jjj_max_min_cut in range(0,6,0.1): # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut) # selections[cut_name] = PackedSelection() # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut) # selection_items[cut_name] = [] # selection_items[cut_name].append("j_jj_dPhi_max_cut") # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]}) # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__() # for dPhi_jjj_min_max_cut in range(0,6,0.1): # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut) # selections[cut_name] = PackedSelection() # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut) # selection_items[cut_name] = [] # selection_items[cut_name].append("j_jj_dPhi_max_cut") # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]}) # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__() return output
def process(self, events): output = self._accumulator.identity() dataset_name = events.metadata['dataset'] output["total_events"][dataset_name] += events.__len__() # HLT selection HLT_mask = [] if year == "2016": if "SingleMuon" in dataset_name: #this does not work, as the name of file which is under processing is unknown if "2016B2" in dataset_name: HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 else: HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50 else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList if "2016B2" in dataset_name: HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID elif "2016H" in dataset_name: HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID else: HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID if year == "2017": if "SingleMuon" in dataset_name: if "2017B" in dataset_name: HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 else: HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100 else: HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 if year == "2018": if "SingleMuon" in dataset_name: HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100 else: HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 # Require 3 jets jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight) event_mask = (awk.sum(jet_mask, axis=1) >= 3) event_mask = event_mask & HLT_mask events_3j = events[event_mask] # Reduce jet mask to only events with 3 good jets jet_mask = jet_mask[event_mask] # Array of the jets to consider for trijet resonance selected_jets = events_3j.Jet[jet_mask][:, :3] # Pairs of jets #pairs = awk.argcombinations(selected_jets, 2) #jet_i, jet_j = awk.unzip(pairs) pairs = [(0, 1), (1, 2), (2, 0)] jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0] m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j]) dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta) jet_k = [2, 0, 1] dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] + selected_jets[:, jet_k]) dEta_i_jk = abs(selected_jets[:, jet_i].eta - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).eta) dPhi_i_jk = abs(selected_jets[:, jet_i].phi - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).phi) m3j = selected_jets.sum().mass pt_i_overM = selected_jets.pt / m3j m_01_overM = m_ij[:, 0] / m3j m_12_overM = m_ij[:, 1] / m3j m_20_overM = m_ij[:, 2] / m3j dPtoverM_0_12 = abs(selected_jets[:, 0].pt - (selected_jets[:, 1] + selected_jets[:, 2]).pt) / m3j dPtoverM_1_20 = abs(selected_jets[:, 1].pt - (selected_jets[:, 2] + selected_jets[:, 0]).pt) / m3j dPtoverM_2_01 = abs(selected_jets[:, 2].pt - (selected_jets[:, 0] + selected_jets[:, 1]).pt) / m3j # Event selection masks selection_masks = {} # Pre-selection selection = PackedSelection() selection.add("Dummy", m3j > 000) sel_mask = selection.require( **{name: True for name in selection.names}) selection_masks["Pre-selection"] = sel_mask # HLT_trigger (this is already done at the beginning) # if year == "2016": # JetHLT_mask = [] # if "2016B2" in dataset_name: # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID # elif "2016H" in dataset_name: # JetHLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID # else: # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID # selection_masks["JetHLT"] = JetHLT_mask[event_mask] # if year == "2017": # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 # selection_masks["JetHLT"] = JetHLT_mask[event_mask] # if year == "2018": # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 # selection_masks["JetHLT"] = JetHLT_mask[event_mask] # Fill histograms for selection, selection_mask in selection_masks.items(): output["mjjj"].fill(dataset=dataset_name, selection=selection, mjjj=m3j[selection_mask]) output["m_ij"].fill(dataset=dataset_name, selection=selection, m_01=m_ij[:, 0][selection_mask], m_12=m_ij[:, 1][selection_mask], m_20=m_ij[:, 2][selection_mask]) output["dR_ij"].fill(dataset=dataset_name, selection=selection, dR_01=dR_ij[:, 0][selection_mask], dR_12=dR_ij[:, 1][selection_mask], dR_20=dR_ij[:, 2][selection_mask]) output["dEta_ij"].fill(dataset=dataset_name, selection=selection, dEta_01=dEta_ij[:, 0][selection_mask], dEta_12=dEta_ij[:, 1][selection_mask], dEta_20=dEta_ij[:, 2][selection_mask]) output["moverM_ij"].fill(dataset=dataset_name, selection=selection, moverM_01=m_01_overM[selection_mask], moverM_12=m_12_overM[selection_mask], moverM_20=m_20_overM[selection_mask]) output["pt_i"].fill(dataset=dataset_name, selection=selection, pt_0=selected_jets[:, 0][selection_mask].pt, pt_1=selected_jets[:, 1][selection_mask].pt, pt_2=selected_jets[:, 2][selection_mask].pt) output["eta_i"].fill(dataset=dataset_name, selection=selection, eta_0=selected_jets[:, 0][selection_mask].eta, eta_1=selected_jets[:, 1][selection_mask].eta, eta_2=selected_jets[:, 2][selection_mask].eta) output["ptoverM_i"].fill(dataset=dataset_name, selection=selection, ptoverM_0=pt_i_overM[:, 0][selection_mask], ptoverM_1=pt_i_overM[:, 1][selection_mask], ptoverM_2=pt_i_overM[:, 2][selection_mask]) output["dR_i_jk"].fill(dataset=dataset_name, selection=selection, dR_0_12=dR_i_jk[:, 0][selection_mask], dR_1_20=dR_i_jk[:, 1][selection_mask], dR_2_01=dR_i_jk[:, 2][selection_mask]) output["dEta_i_jk"].fill(dataset=dataset_name, selection=selection, dEta_0_12=dEta_i_jk[:, 0][selection_mask], dEta_1_20=dEta_i_jk[:, 1][selection_mask], dEta_2_01=dEta_i_jk[:, 2][selection_mask]) output["dPhi_i_jk"].fill(dataset=dataset_name, selection=selection, dPhi_0_12=dPhi_i_jk[:, 0][selection_mask], dPhi_1_20=dPhi_i_jk[:, 1][selection_mask], dPhi_2_01=dPhi_i_jk[:, 2][selection_mask]) output["dPtoverM_i_jk"].fill( dataset=dataset_name, selection=selection, dPtoverM_0_12=dPtoverM_0_12[selection_mask], dPtoverM_1_20=dPtoverM_1_20[selection_mask], dPtoverM_2_01=dPtoverM_2_01[selection_mask]) pt_i_overM_2fill = pt_i_overM[selection_mask] dR_ij_2fill = dR_ij[selection_mask] dEta_ij_2fill = dEta_ij[selection_mask] dR_i_jk_2fill = dR_i_jk[selection_mask] dEta_i_jk_2fill = dEta_i_jk[selection_mask] dPhi_i_jk_2fill = dPhi_i_jk[selection_mask] dPtoverM_0_12_2fill = dPtoverM_0_12[selection_mask] dPtoverM_1_20_2fill = dPtoverM_1_20[selection_mask] dPtoverM_2_01_2fill = dPtoverM_2_01[selection_mask] selected_jets_2fill = selected_jets[selection_mask] max_pt_overM_2fill = awk.max(pt_i_overM_2fill, axis=1) min_pt_overM_2fill = awk.min(pt_i_overM_2fill, axis=1) max_dR_2fill = awk.max(dR_ij_2fill, axis=1) max_dEta_2fill = awk.max(dEta_ij_2fill, axis=1) min_dR_2fill = awk.min(dR_ij_2fill, axis=1) min_dEta_2fill = awk.min(dEta_ij_2fill, axis=1) min_pt_2fill = awk.min(selected_jets_2fill.pt, axis=1) max_eta_2fill = awk.max(abs(selected_jets_2fill.eta), axis=1) max_dR_i_jk_2fill = awk.max(dR_i_jk_2fill, axis=1) min_dR_i_jk_2fill = awk.min(dR_i_jk_2fill, axis=1) max_dEta_i_jk_2fill = awk.max(dEta_i_jk_2fill, axis=1) min_dEta_i_jk_2fill = awk.min(dEta_i_jk_2fill, axis=1) max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk_2fill, axis=1) min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk_2fill, axis=1) max_dPtoverM_i_jk_2fill = [] min_dPtoverM_i_jk_2fill = [] for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill, dPtoverM_2_01_2fill): max_dPtoverM_i_jk_2fill.append(max(pair)) min_dPtoverM_i_jk_2fill.append(min(pair)) max_dPtoverM_i_jk_2fill = np.array(max_dPtoverM_i_jk_2fill) min_dPtoverM_i_jk_2fill = np.array(min_dPtoverM_i_jk_2fill) max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99) min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99) max_dR_2fill = awk.fill_none(max_dR_2fill, -99) max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99) min_dR_2fill = awk.fill_none(min_dR_2fill, -99) min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99) min_pt_2fill = awk.fill_none(min_pt_2fill, -99) max_eta_2fill = awk.fill_none(max_eta_2fill, -99) max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99) min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99) max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99) min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99) max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99) min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99) output["max_dR"].fill(dataset=dataset_name, selection=selection, max_dR=max_dR_2fill) output["max_dEta"].fill(dataset=dataset_name, selection=selection, max_dEta=max_dEta_2fill) output["min_dR"].fill(dataset=dataset_name, selection=selection, min_dR=min_dR_2fill) output["min_dEta"].fill(dataset=dataset_name, selection=selection, min_dEta=min_dEta_2fill) output["min_pt"].fill(dataset=dataset_name, selection=selection, min_pt=min_pt_2fill) output["max_eta"].fill(dataset=dataset_name, selection=selection, max_eta=max_eta_2fill) output["max_ptoverM"].fill(dataset=dataset_name, selection=selection, max_ptoverM=max_pt_overM_2fill) output["min_ptoverM"].fill(dataset=dataset_name, selection=selection, min_ptoverM=min_pt_overM_2fill) output["max_dR_j_jj"].fill(dataset=dataset_name, selection=selection, max_dR_j_jj=max_dR_i_jk_2fill) output["max_dEta_j_jj"].fill(dataset=dataset_name, selection=selection, max_dEta_j_jj=max_dEta_i_jk_2fill) output["max_dPhi_j_jj"].fill(dataset=dataset_name, selection=selection, max_dPhi_j_jj=max_dPhi_i_jk_2fill) output["max_dPtoverM_j_jj"].fill( dataset=dataset_name, selection=selection, max_dPtoverM_j_jj=max_dPtoverM_i_jk_2fill) output["min_dR_j_jj"].fill(dataset=dataset_name, selection=selection, min_dR_j_jj=min_dR_i_jk_2fill) output["min_dEta_j_jj"].fill(dataset=dataset_name, selection=selection, min_dEta_j_jj=min_dEta_i_jk_2fill) output["min_dPhi_j_jj"].fill(dataset=dataset_name, selection=selection, min_dPhi_j_jj=min_dPhi_i_jk_2fill) output["min_dPtoverM_j_jj"].fill( dataset=dataset_name, selection=selection, min_dPtoverM_j_jj=min_dPtoverM_i_jk_2fill) return output
def process(self, events): output = self._accumulator.identity() dataset_name = events.metadata['dataset'] output["total_events"][dataset_name] += events.__len__() # Initialize dict accumulators, if have not been initialized for jet in [0, 1, 2]: if dataset_name not in output[f"eta_{jet}_final"].keys(): output[f"eta_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"ptoverM_{jet}_final"].keys(): output[f"ptoverM_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([])) for pair in [(0, 1), (1, 2), (2, 0)]: if dataset_name not in output[f"dEta_{pair[0]}{pair[1]}_final"].keys(): output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dR_{pair[0]}{pair[1]}_final"].keys(): output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"moverM_{pair[0]}{pair[1]}_final"].keys(): output[f"moverM_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([])) for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]: if dataset_name not in output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"].keys(): output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"].keys(): output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"].keys(): output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"].keys(): output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"ptoverM_max_final"].keys(): output[f"ptoverM_max_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"ptoverM_min_final"].keys(): output[f"ptoverM_min_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"eta_max_final"].keys(): output[f"eta_max_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dR_max_final"].keys(): output[f"dR_max_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dR_min_final"].keys(): output[f"dR_min_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dEta_max_final"].keys(): output[f"dEta_max_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dEta_min_final"].keys(): output[f"dEta_min_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dR_j_jj_max_final"].keys(): output[f"dR_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dR_j_jj_min_final"].keys(): output[f"dR_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dEta_j_jj_max_final"].keys(): output[f"dEta_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dEta_j_jj_min_final"].keys(): output[f"dEta_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dPhi_j_jj_max_final"].keys(): output[f"dPhi_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dPhi_j_jj_min_final"].keys(): output[f"dPhi_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dPtoverM_j_jj_max_final"].keys(): output[f"dPtoverM_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([])) if dataset_name not in output[f"dPtoverM_j_jj_min_final"].keys(): output[f"dPtoverM_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([])) # HLT selection HLT_mask = [] if year == "2016": if "SingleMuon" in dataset_name: #this does not work, as the name of file which is under processing is unknown if "2016B2" in dataset_name: HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 else: HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50 else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList if "2016B2" in dataset_name: HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID elif "2016H" in dataset_name: HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID else: HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID if year == "2017": if "SingleMuon" in dataset_name: if "2017B" in dataset_name: HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 else: HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100 else: HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 if year == "2018": if "SingleMuon" in dataset_name: HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100 else: HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 # Require 3 jets jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight) event_mask = (awk.sum(jet_mask, axis=1) >= 3) event_mask = event_mask & HLT_mask events_3j = events[event_mask] # Reduce jet mask to only events with 3 good jets jet_mask = jet_mask[event_mask] # Array of the jets to consider for trijet resonance selected_jets = events_3j.Jet[jet_mask][:, :3] # Pairs of jets #pairs = awk.argcombinations(selected_jets, 2) #jet_i, jet_j = awk.unzip(pairs) pairs = [(0, 1), (1, 2), (2, 0)] jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0] m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j]) dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta) jet_k = [2, 0, 1] dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] + selected_jets[:, jet_k]) dEta_i_jk = abs(selected_jets[:, jet_i].eta - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).eta) dPhi_i_jk = abs(selected_jets[:, jet_i].phi - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).phi) m3j = selected_jets.sum().mass pt_i_overM = selected_jets.pt / m3j m_01_overM = m_ij[:,0] / m3j m_12_overM = m_ij[:,1] / m3j m_20_overM = m_ij[:,2] / m3j dPtoverM_0_12 = abs(selected_jets[:, 0].pt - (selected_jets[:, 1] + selected_jets[:, 2]).pt) / m3j dPtoverM_1_20 = abs(selected_jets[:, 1].pt - (selected_jets[:, 2] + selected_jets[:, 0]).pt) / m3j dPtoverM_2_01 = abs(selected_jets[:, 2].pt - (selected_jets[:, 0] + selected_jets[:, 1]).pt) / m3j # Event selection masks # selection_masks = {} # Pre-selection selection = PackedSelection() selection.add("Dummy", m3j > 000) sel_mask = selection.require(**{name: True for name in selection.names}) # selection_masks["Pre-selection"] = sel_mask output["selected_events"][dataset_name] += events_3j[sel_mask].__len__() for jet in [0, 1, 2]: output[f"eta_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(selected_jets[:, jet][sel_mask].eta)) output[f"ptoverM_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(pt_i_overM[:, jet][sel_mask])) for pair in [(0, 1), (1, 2), (2, 0)]: output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_ij[:, pair[0]][sel_mask])) output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_ij[:, pair[0]][sel_mask])) output[f"moverM_01_final"][dataset_name] += processor.column_accumulator(np.array(m_01_overM[sel_mask])) output[f"moverM_12_final"][dataset_name] += processor.column_accumulator(np.array(m_12_overM[sel_mask])) output[f"moverM_20_final"][dataset_name] += processor.column_accumulator(np.array(m_20_overM[sel_mask])) for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]: output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_i_jk[:, pair[0]][sel_mask])) output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_i_jk[:, pair[0]][sel_mask])) output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dPhi_i_jk[:, pair[0]][sel_mask])) output[f"dPtoverM_0_12_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_0_12[sel_mask])) output[f"dPtoverM_1_20_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_1_20[sel_mask])) output[f"dPtoverM_2_01_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_2_01[sel_mask])) max_pt_overM_2fill = awk.max(pt_i_overM[sel_mask], axis=1) min_pt_overM_2fill = awk.min(pt_i_overM[sel_mask], axis=1) max_dR_2fill = awk.max(dR_ij[sel_mask], axis=1) max_dEta_2fill = awk.max(dEta_ij[sel_mask], axis=1) min_dR_2fill = awk.min(dR_ij[sel_mask], axis=1) min_dEta_2fill = awk.min(dEta_ij[sel_mask], axis=1) min_pt_2fill = awk.min(selected_jets[sel_mask].pt, axis=1) max_eta_2fill = awk.max(abs(selected_jets[sel_mask].eta), axis=1) max_dR_i_jk_2fill = awk.max(dR_i_jk[sel_mask], axis=1) min_dR_i_jk_2fill = awk.min(dR_i_jk[sel_mask], axis=1) max_dEta_i_jk_2fill = awk.max(dEta_i_jk[sel_mask], axis=1) min_dEta_i_jk_2fill = awk.min(dEta_i_jk[sel_mask], axis=1) max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk[sel_mask], axis=1) min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk[sel_mask], axis=1) max_dPtoverM_i_jk_2fill = [] min_dPtoverM_i_jk_2fill = [] dPtoverM_0_12_2fill = dPtoverM_0_12[sel_mask] dPtoverM_1_20_2fill = dPtoverM_1_20[sel_mask] dPtoverM_2_01_2fill = dPtoverM_2_01[sel_mask] for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill, dPtoverM_2_01_2fill): max_dPtoverM_i_jk_2fill.append(max(pair)) min_dPtoverM_i_jk_2fill.append(min(pair)) max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99) min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99) max_dR_2fill = awk.fill_none(max_dR_2fill, -99) max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99) min_dR_2fill = awk.fill_none(min_dR_2fill, -99) min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99) min_pt_2fill = awk.fill_none(min_pt_2fill, -99) max_eta_2fill = awk.fill_none(max_eta_2fill, -99) max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99) min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99) max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99) min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99) max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99) min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99) output[f"ptoverM_max_final"][dataset_name] += processor.column_accumulator(np.array(max_pt_overM_2fill)) output[f"ptoverM_min_final"][dataset_name] += processor.column_accumulator(np.array(min_pt_overM_2fill)) output[f"eta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_eta_2fill)) output[f"dR_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_2fill)) output[f"dR_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_2fill)) output[f"dEta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_2fill)) output[f"dEta_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_2fill)) output[f"dR_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_i_jk_2fill)) output[f"dR_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_i_jk_2fill)) output[f"dEta_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_i_jk_2fill)) output[f"dEta_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_i_jk_2fill)) output[f"dPhi_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPhi_i_jk_2fill)) output[f"dPhi_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPhi_i_jk_2fill)) output[f"dPtoverM_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPtoverM_i_jk_2fill)) output[f"dPtoverM_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPtoverM_i_jk_2fill)) return output
def process(self, events): output = self._accumulator.identity() dataset_name = events.metadata['dataset'] output["total_events"][dataset_name] += events.__len__() # HLT selection HLT_mask = [] if year == "2016": if "SingleMuon" in dataset_name: #this does not work, as the name of file which is under processing is unknown if "2016B2" in dataset_name: HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 else: HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50 else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList if "2016B2" in dataset_name: HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID elif "2016H" in dataset_name: HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID else: HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID if year == "2017": if "SingleMuon" in dataset_name: if "2017B" in dataset_name: HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 else: HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100 else: HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 if year == "2018": if "SingleMuon" in dataset_name: HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100 else: HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 # Require 3 jets jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight) event_mask = (awk.sum(jet_mask, axis=1) >= 3) event_mask = event_mask & HLT_mask events_3j = events[event_mask] # Reduce jet mask to only events with 3 good jets jet_mask = jet_mask[event_mask] # Array of the jets to consider for trijet resonance selected_jets = events_3j.Jet[jet_mask][:, :3] # Pairs of jets #pairs = awk.argcombinations(selected_jets, 2) #jet_i, jet_j = awk.unzip(pairs) pairs = [(0, 1), (1, 2), (2, 0)] jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0] m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j]) dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta) max_dR = awk.max(dR_ij, axis=1) max_dEta = awk.max(dEta_ij, axis=1) min_dR = awk.min(dR_ij, axis=1) min_dEta = awk.min(dEta_ij, axis=1) min_pT = awk.min(selected_jets.pt, axis=1) #m01 = (selected_jets[:, 0] + selected_jets[:, 1]).mass #m12 = (selected_jets[:, 1] + selected_jets[:, 2]).mass #m20 = (selected_jets[:, 2] + selected_jets[:, 0]).mass #dR01 = (selected_jets[:, 0].delta_r(selected_jets[:, 1])) #dR12 = (selected_jets[:, 1].delta_r(selected_jets[:, 2])) #dR20 = (selected_jets[:, 2].delta_r(selected_jets[:, 0])) #dEta01 = abs(selected_jets[:, 0].eta - selected_jets[:, 1].eta) #dEta12 = abs(selected_jets[:, 1].eta - selected_jets[:, 2].eta) #dEta20 = abs(selected_jets[:, 2].eta - selected_jets[:, 0].eta) m3j = selected_jets.sum( ).mass #(selected_jets[:, 0] + selected_jets[:, 1] + selected_jets[:, 2]).mass pt_i_overM = selected_jets.pt / m3j m_01_overM = m_ij[:, 0] / m3j m_12_overM = m_ij[:, 1] / m3j m_20_overM = m_ij[:, 2] / m3j # Event selection - pre-selection selections = {} selection_items = {} selections["pre-selection"] = PackedSelection() selection_items["pre-selection"] = [] selections["pre-selection"].add("MaxDEta", max_dEta < 1.3) selection_items["pre-selection"].append("MaxDEta") selections["pre-selection"].add("MinDR", min_dR > 0.4) selection_items["pre-selection"].append("MinDR") selections["pre-selection"].add("MinJetPt", min_pT > 50.) selection_items["pre-selection"].append("MinJetPt") # Event selection - pre-selection & HLT_trigger selections["JetHLT - presel"] = PackedSelection() selection_items["JetHLT - presel"] = [] if year == "2016": JetHLT_mask = [] if "2016B2" in dataset_name: JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID elif "2016H" in dataset_name: JetHLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID else: JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID selections["JetHLT - presel"].add("JetHLT_fired", JetHLT_mask[event_mask]) selection_items["JetHLT - presel"].append("JetHLT_fired") if year == "2017": JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 selections["JetHLT - presel"].add("JetHLT_fired", JetHLT_mask[event_mask]) selection_items["JetHLT - presel"].append("JetHLT_fired") if year == "2018": JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500 selections["JetHLT - presel"].add("JetHLT_fired", JetHLT_mask[event_mask]) selection_items["JetHLT - presel"].append("JetHLT_fired") selections["JetHLT - presel"].add("MaxDEta", max_dEta < 1.3) selection_items["JetHLT - presel"].append("MaxDEta") selections["JetHLT - presel"].add("MinDR", min_dR > 0.4) selection_items["JetHLT - presel"].append("MinDR") selections["JetHLT - presel"].add("MinJetPt", min_pT > 50.) selection_items["JetHLT - presel"].append("MinJetPt") # Fill histograms for selection_name, selection in selections.items(): output["mjjj"].fill( dataset=dataset_name, selection=selection_name, mjjj=m3j[selection.require( **{name: True for name in selection_items[selection_name]})]) output["m01"].fill( dataset=dataset_name, selection=selection_name, m01=m_ij[:, 0][selection.require( **{name: True for name in selection_items[selection_name]})]) output["m12"].fill( dataset=dataset_name, selection=selection_name, m12=m_ij[:, 1][selection.require( **{name: True for name in selection_items[selection_name]})]) output["m20"].fill( dataset=dataset_name, selection=selection_name, m20=m_ij[:, 2][selection.require( **{name: True for name in selection_items[selection_name]})]) output["dR01"].fill( dataset=dataset_name, selection=selection_name, dR01=dR_ij[:, 0][selection.require( **{name: True for name in selection_items[selection_name]})]) output["dR12"].fill( dataset=dataset_name, selection=selection_name, dR12=dR_ij[:, 1][selection.require( **{name: True for name in selection_items[selection_name]})]) output["dR20"].fill( dataset=dataset_name, selection=selection_name, dR20=dR_ij[:, 2][selection.require( **{name: True for name in selection_items[selection_name]})]) output["dEta01"].fill( dataset=dataset_name, selection=selection_name, dEta01=dEta_ij[:, 0][selection.require( **{name: True for name in selection_items[selection_name]})]) output["dEta12"].fill( dataset=dataset_name, selection=selection_name, dEta12=dEta_ij[:, 1][selection.require( **{name: True for name in selection_items[selection_name]})]) output["dEta20"].fill( dataset=dataset_name, selection=selection_name, dEta20=dEta_ij[:, 2][selection.require( **{name: True for name in selection_items[selection_name]})]) output["m01overM"].fill( dataset=dataset_name, selection=selection_name, m01overM=m_01_overM[selection.require( **{name: True for name in selection_items[selection_name]})]) output["m12overM"].fill( dataset=dataset_name, selection=selection_name, m12overM=m_12_overM[selection.require( **{name: True for name in selection_items[selection_name]})]) output["m20overM"].fill( dataset=dataset_name, selection=selection_name, m20overM=m_20_overM[selection.require( **{name: True for name in selection_items[selection_name]})]) output["pt0"].fill( dataset=dataset_name, selection=selection_name, pt0=selected_jets[:, 0][selection.require( **{name: True for name in selection_items[selection_name]})].pt) output["pt1"].fill( dataset=dataset_name, selection=selection_name, pt1=selected_jets[:, 1][selection.require( **{name: True for name in selection_items[selection_name]})].pt) output["pt2"].fill( dataset=dataset_name, selection=selection_name, pt2=selected_jets[:, 2][selection.require( **{name: True for name in selection_items[selection_name]})].pt) output["eta0"].fill( dataset=dataset_name, selection=selection_name, eta0=selected_jets[:, 0][selection.require( **{name: True for name in selection_items[selection_name]})].eta) output["eta1"].fill( dataset=dataset_name, selection=selection_name, eta1=selected_jets[:, 1][selection.require( **{name: True for name in selection_items[selection_name]})].eta) output["eta2"].fill( dataset=dataset_name, selection=selection_name, eta2=selected_jets[:, 2][selection.require( **{name: True for name in selection_items[selection_name]})].eta) output["ptoverM0"].fill( dataset=dataset_name, selection=selection_name, ptoverM0=pt_i_overM[:, 0][selection.require( **{name: True for name in selection_items[selection_name]})]) output["ptoverM1"].fill( dataset=dataset_name, selection=selection_name, ptoverM1=pt_i_overM[:, 1][selection.require( **{name: True for name in selection_items[selection_name]})]) output["ptoverM2"].fill( dataset=dataset_name, selection=selection_name, ptoverM2=pt_i_overM[:, 2][selection.require( **{name: True for name in selection_items[selection_name]})]) return output
def runOneFile(filename): #print ("filename: ", filename) #inputfile=filename outputfile = "output/" + inputfile.split("/")[-1] #outputfile = "tmp.root" mycache = uproot4.LRUArrayCache("1 MB") file_ = uproot4.open(inputfile, num_workers=10) #print ("root file opened: ", filename) nevents = ak.to_list(file_["h_total_mcweight"].values())[2] #nevents = 1000000 print("histogram opened: ", nevents) #tree_ = uproot4.open(inputfile, num_workers=10)["outTree"].arrays(array_cache=mycache) tree_ = file_["outTree"].arrays(array_cache=mycache) print("tree length", len(tree_)) #tree_ = uproot4.open(inputfile)[trees[0]].arrays() #tree_ = uproot4.open(inputfile)["outTree"].arrays(array_cache=mycache) #tree_ = uproot4.open("Merged_WJetsInclusiveSkim.root")["outTree"].arrays(array_cache=mycache) #tree_ = uproot4.open("/eos/cms/store/group/phys_exotica/bbMET/2016_SkimmedFiles/skim_setup_2016_v16_07-00/crab_DYJetsToLL_M-50_HT-400to600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_200918_215129_0000_0.root")["outTree"].arrays(array_cache=mycache) #tree_ = uproot4.open("/eos/cms/store/group/phys_exotica/bbMET/2016_SkimmedFiles/skim_setup_2016_v16_07-00/crab_ttHTobb_M125_13TeV_powheg_pythia8_200918_215950_0000_0.root")["outTree"].arrays(array_cache=mycache) #print ((tree_)) cms_events = ak.zip( { "run": tree_["st_runId"], "lumi": tree_["st_lumiSection"], "event": tree_["st_eventId"], "jetpx": tree_["st_THINjetPx"], "jetpy": tree_["st_THINjetPy"], "jetpz": tree_["st_THINjetPz"], "jete": tree_["st_THINjetEnergy"], "jetpt": getpt(tree_["st_THINjetPx"], tree_["st_THINjetPy"]), "jeteta": geteta(tree_["st_THINjetPx"], tree_["st_THINjetPy"], tree_["st_THINjetPz"]), "jetphi": getphi(tree_["st_THINjetPx"], tree_["st_THINjetPy"]), "jetcsv": tree_["st_THINjetDeepCSV"], "jetflav": tree_["st_THINjetHadronFlavor"], "metpt": tree_["st_pfMetCorrPt"], "metphi": tree_["st_pfMetCorrPhi"], "mettrig": tree_["st_mettrigdecision"], "elepx": tree_["st_elePx"], "elepy": tree_["st_elePy"], "elepz": tree_["st_elePz"], "elee": tree_["st_eleEnergy"], "eleidL": tree_["st_eleIsPassLoose"], "eleidT": tree_["st_eleIsPassTight"], "eleq": tree_["st_eleCharge"], "elept": getpt(tree_["st_elePx"], tree_["st_elePy"]), "eleeta": geteta(tree_["st_elePx"], tree_["st_elePy"], tree_["st_elePz"]), "elephi": getphi(tree_["st_elePx"], tree_["st_elePy"]), "mupx": tree_["st_muPx"], "mupy": tree_["st_muPy"], "mupz": tree_["st_muPz"], "mue": tree_["st_muEnergy"], "muidT": tree_["st_isTightMuon"], "muq": tree_["st_muCharge"], "mupt": getpt(tree_["st_muPx"], tree_["st_muPy"]), "mueta": geteta(tree_["st_muPx"], tree_["st_muPy"], tree_["st_muPz"]), "muphi": getphi(tree_["st_muPx"], tree_["st_muPy"]), "ntau": tree_["st_nTau_discBased_TightEleTightMuVeto"], "npho": tree_["st_nPho"], "phopx": tree_["st_phoPx"], "phopy": tree_["st_phoPy"], "phopz": tree_["st_phoPz"], "phoe": tree_["st_phoEnergy"], "phopt": getpt(tree_["st_phoPx"], tree_["st_phoPy"]), "phoeta": geteta(tree_["st_phoPx"], tree_["st_phoPy"], tree_["st_phoPz"]), "nTrueInt": tree_["st_pu_nTrueInt"], "nPUVert": tree_["st_pu_nPUVert"], "genpt": tree_["st_genParPt"] }, depth_limit=1) out_events = ak.zip( { "run": tree_["st_runId"], "lumi": tree_["st_lumiSection"], "event": tree_["st_eventId"] }, depth_limit=1) print("event loading done") print("# of events: ", len(cms_events)) ## add more columns/properties to the event cms_events["mu_sel_tight"] = (cms_events.mupt > 30) & ( cms_events.muidT == True) & (numpy.abs(cms_events.mueta) < 2.4) cms_events["mu_sel_tight0"] = ak.Array(getN(cms_events.mu_sel_tight, 0)) cms_events["nMuTight"] = ak.sum(cms_events.mu_sel_tight, axis=-1) cms_events["nMuLoose"] = ak.sum((cms_events.mupt > 10), axis=-1) cms_events["mu_q0"] = ak.Array(getN(cms_events.muq, 0)) cms_events["mu_q1"] = ak.Array(getN(cms_events.muq, 1)) cms_events["ele_sel_tight"] = (cms_events.eleidT == True) & ( cms_events.elept > 30) & (numpy.abs(cms_events.eleeta) < 2.5) cms_events["ele_sel_tight0"] = ak.Array(getN(cms_events.ele_sel_tight, 0)) cms_events["nEleTight"] = ak.sum(cms_events.ele_sel_tight, axis=-1) cms_events["nEleLoose"] = ak.sum((cms_events.elept > 10), axis=-1) cms_events["ele_q0"] = ak.Array(getN(cms_events.eleq, 0)) cms_events["ele_q1"] = ak.Array(getN(cms_events.eleq, 1)) cms_events["recoil_Wmunu"] = getrecoil(cms_events.nMuTight, cms_events.mupt, cms_events.muphi, cms_events.mupx, cms_events.mupy, cms_events.metpt, cms_events.metphi) cms_events["recoil_Wmunu0"] = ak.firsts(cms_events.recoil_Wmunu) cms_events["recoil_Wenu"] = getrecoil(cms_events.nEleTight, cms_events.elept, cms_events.elephi, cms_events.elepx, cms_events.elepy, cms_events.metpt, cms_events.metphi) cms_events["recoil_Wenu0"] = ak.firsts(cms_events.recoil_Wenu) elepx0 = ak.Array(getN(cms_events.elepx, 0)) elepx1 = ak.Array(getN(cms_events.elepx, 1)) elepy0 = ak.Array(getN(cms_events.elepy, 0)) elepy1 = ak.Array(getN(cms_events.elepy, 1)) elepz0 = ak.Array(getN(cms_events.elepz, 0)) elepz1 = ak.Array(getN(cms_events.elepz, 1)) elee0 = ak.Array(getN(cms_events.elee, 0)) elee1 = ak.Array(getN(cms_events.elee, 1)) cms_events["Zee_mass"] = numpy.sqrt((elee0 + elee1)**2 - (elepx0 + elepx1)**2 - (elepy0 + elepy1)**2 - (elepz0 + elepz1)**2) cms_events["Zee_pt"] = numpy.sqrt((elepx0 + elepx1)**2 + (elepy0 + elepy1)**2) cms_events["Zee_recoil"] = getrecoil1((elepx0 + elepx1), (elepy0 + elepy1), cms_events.metpt, cms_events.metphi) mupx0 = ak.Array(getN(cms_events.mupx, 0)) mupx1 = ak.Array(getN(cms_events.mupx, 1)) mupy0 = ak.Array(getN(cms_events.mupy, 0)) mupy1 = ak.Array(getN(cms_events.mupy, 1)) mupz0 = ak.Array(getN(cms_events.mupz, 0)) mupz1 = ak.Array(getN(cms_events.mupz, 1)) mue0 = ak.Array(getN(cms_events.mue, 0)) mue1 = ak.Array(getN(cms_events.mue, 1)) cms_events["Zmumu_mass"] = numpy.sqrt((mue0 + mue1)**2 - (mupx0 + mupx1)**2 - (mupy0 + mupy1)**2 - (mupz0 + mupz1)**2) cms_events["Zmumu_pt"] = numpy.sqrt((mupx0 + mupx1)**2 + (mupy0 + mupy1)**2) cms_events["Zmumu_recoil"] = getrecoil1( (mupx0 + mupx1), (mupy0 + mupy1), cms_events.metpt, cms_events.metphi) #cms_events["recoil_Zmumu"] = getrecoil cms_events["recoil_WmunuPhi"] = getRecoilPhi( cms_events.nMuTight, cms_events.mupt, cms_events.muphi, cms_events.mupx, cms_events.mupy, cms_events.metpt, cms_events.metphi) cms_events["recoil_WmunuPhi0"] = ak.firsts(cms_events.recoil_WmunuPhi) cms_events["recoil_WenuPhi"] = getRecoilPhi( cms_events.nEleTight, cms_events.elept, cms_events.elephi, cms_events.elepx, cms_events.elepy, cms_events.metpt, cms_events.metphi) cms_events["recoil_WenuPhi0"] = ak.firsts(cms_events.recoil_WenuPhi) cms_events["mt_Wmunu"] = getMT(cms_events.nMuTight, cms_events.mupt, cms_events.muphi, cms_events.mupx, cms_events.mupy, cms_events.metpt, cms_events.metphi) cms_events["mt_Wmunu0"] = ak.firsts(cms_events.mt_Wmunu) cms_events["mt_Wenu"] = getMT(cms_events.nEleTight, cms_events.elept, cms_events.elephi, cms_events.elepx, cms_events.elepy, cms_events.metpt, cms_events.metphi) cms_events["mt_Wenu0"] = ak.firsts(cms_events.mt_Wenu) cms_events["jet_sel_loose"] = (cms_events.jetpt > 30.0) & (numpy.abs( cms_events.jeteta) < 2.5) cms_events["jet_sel_tight"] = (cms_events.jetpt > 50.0) & (numpy.abs( cms_events.jeteta) < 2.5) #cms_events["jet_sel_b"] = (cms_events.jetcsv > 0.6321) & (numpy.abs(cms_events.jeteta)<2.4) cms_events["jet_sel_b"] = ( cms_events.jetcsv[cms_events.jet_sel_loose == True] > 0.6321 ) & (numpy.abs(cms_events.jeteta[cms_events.jet_sel_loose == True]) < 2.4) cms_events["jetptTight"] = cms_events.jetpt[cms_events.jet_sel_tight == True] cms_events["jetetaTight"] = cms_events.jeteta[cms_events.jet_sel_tight == True] cms_events["jetphiTight"] = cms_events.jetphi[cms_events.jet_sel_tight == True] cms_events["jetptLoose"] = cms_events.jetpt[cms_events.jet_sel_loose == True] cms_events["jetetaLoose"] = cms_events.jeteta[cms_events.jet_sel_loose == True] cms_events["jetphiLoose"] = cms_events.jetphi[cms_events.jet_sel_loose == True] cms_events["jet_sel_tight0"] = ak.Array( getN(cms_events.jet_sel_tight[cms_events.jet_sel_loose == True], 0)) cms_events["jet_sel_b_0"] = ak.Array(getN(cms_events.jet_sel_b, 0)) cms_events["jet_sel_b_1"] = ak.Array(getN(cms_events.jet_sel_b, 1)) cms_events["nJetLoose"] = ak.sum(cms_events.jet_sel_loose, axis=-1) cms_events["nJetTight"] = ak.sum(cms_events.jet_sel_tight, axis=-1) cms_events["nJetb"] = ak.sum(cms_events.jet_sel_b, axis=-1) cms_events["dphi_jet_met"] = DeltaPhi( cms_events.jetphi[cms_events.jet_sel_loose == True], cms_events.metphi) cms_events["min_dphi_jet_met"] = ak.min(cms_events.dphi_jet_met, axis=-1) #-------------------------------------------------------------------------------------------------- ## W --> lepton + nu #-------------------------------------------------------------------------------------------------- from regions import get_mask_wmunu1b, get_mask_wmunu2b, get_mask_wenu1b, get_mask_wenu2b, get_mask_topmunu1b, get_mask_topmunu2b, get_mask_topenu1b, get_mask_topenu2b, get_mask_Zmumu1b, get_mask_Zmumu2b, get_mask_Zee1b, get_mask_Zee2b, get_mask_SR1b, get_mask_SR2b cms_events["mask_wmunu1b"] = get_mask_wmunu1b(cms_events) cms_events["mask_wmunu2b"] = get_mask_wmunu2b(cms_events) cms_events["mask_wenu1b"] = get_mask_wenu1b(cms_events) cms_events["mask_wenu2b"] = get_mask_wenu2b(cms_events) cms_events["mask_topmunu1b"] = get_mask_topmunu1b(cms_events) cms_events["mask_topmunu2b"] = get_mask_topmunu2b(cms_events) cms_events["mask_topenu1b"] = get_mask_topenu1b(cms_events) cms_events["mask_topenu2b"] = get_mask_topenu2b(cms_events) cms_events["mask_Zmumu1b"] = get_mask_Zmumu1b(cms_events) cms_events["mask_Zmumu2b"] = get_mask_Zmumu2b(cms_events) cms_events["mask_Zee1b"] = get_mask_Zee1b(cms_events) cms_events["mask_Zee2b"] = get_mask_Zee2b(cms_events) cms_events["mask_SR1b"] = get_mask_SR1b(cms_events) cms_events["mask_SR2b"] = get_mask_SR2b(cms_events) ''' wm = cms_events.event[mask_SR2b] wm[~ak.is_none(wm)] ''' ############### out_events["metpt"] = cms_events["metpt"] out_events["metphi"] = cms_events["metphi"] out_events["nTrueInt"] = cms_events["nTrueInt"] out_events["nJetLoose"] = cms_events["nJetLoose"] out_events["mu_sel_tight0"] = cms_events["mu_sel_tight0"] out_events["nMuTight"] = cms_events["nMuTight"] out_events["nMuLoose"] = cms_events["nMuLoose"] out_events["mu_q0"] = cms_events["mu_q0"] out_events["mu_q1"] = cms_events["mu_q1"] out_events["mupt0"] = ak.Array(getN(cms_events.mupt, 0)) out_events["mupt1"] = ak.Array(getN(cms_events.mupt, 1)) out_events["mueta0"] = ak.Array(getN(cms_events.mueta, 0)) out_events["mueta1"] = ak.Array(getN(cms_events.mueta, 1)) out_events["muphi0"] = ak.Array(getN(cms_events.muphi, 0)) out_events["muphi1"] = ak.Array(getN(cms_events.muphi, 1)) out_events["ele_sel_tight0"] = cms_events["ele_sel_tight0"] out_events["nEleTight"] = cms_events["nEleTight"] out_events["nEleLoose"] = cms_events["nEleLoose"] out_events["ele_q0"] = cms_events["ele_q0"] out_events["ele_q1"] = cms_events["ele_q1"] out_events["elept0"] = ak.Array(getN(cms_events.elept, 0)) out_events["elept1"] = ak.Array(getN(cms_events.elept, 1)) out_events["eleeta0"] = ak.Array(getN(cms_events.eleeta, 0)) out_events["eleeta1"] = ak.Array(getN(cms_events.eleeta, 1)) out_events["elephi0"] = ak.Array(getN(cms_events.elephi, 0)) out_events["elephi1"] = ak.Array(getN(cms_events.elephi, 1)) out_events["recoil_Wmunu0"] = cms_events["recoil_Wmunu0"] out_events["recoil_Wenu0"] = cms_events["recoil_Wenu0"] out_events["recoil_WmunuPhi0"] = cms_events["recoil_WmunuPhi0"] out_events["recoil_WenuPhi0"] = cms_events["recoil_WenuPhi0"] out_events["mt_Wmunu0"] = cms_events["mt_Wmunu0"] out_events["mt_Wenu0"] = cms_events["mt_Wenu0"] out_events["Zee_mass"] = cms_events["Zee_mass"] out_events["Zee_pt"] = cms_events["Zee_pt"] out_events["Zee_recoil"] = cms_events["Zee_recoil"] out_events["Zmumu_mass"] = cms_events["Zmumu_mass"] out_events["Zmumu_pt"] = cms_events["Zmumu_pt"] out_events["Zmumu_recoil"] = cms_events["Zmumu_recoil"] out_events["nJetLoose"] = cms_events["nJetLoose"] out_events["nJetTight"] = cms_events["nJetTight"] out_events["nJetb"] = cms_events["nJetb"] out_events["min_dphi_jet_met"] = cms_events["min_dphi_jet_met"] cms_events["jet_sel_tight0"] = cms_events["jet_sel_tight0"] cms_events["jet_sel_b_0"] = cms_events["jet_sel_b_0"] cms_events["jet_sel_b_1"] = cms_events["jet_sel_b_1"] out_events["jetpt0"] = ak.Array(getN(cms_events.jetptTight, 0)) out_events["jetpt1"] = ak.Array(getN(cms_events.jetptLoose, 1)) out_events["jetpt2"] = ak.Array(getN(cms_events.jetptLoose, 2)) out_events["jetpt3"] = ak.Array(getN(cms_events.jetptLoose, 3)) out_events["jetpt4"] = ak.Array(getN(cms_events.jetptLoose, 4)) out_events["jetpt5"] = ak.Array(getN(cms_events.jetptLoose, 5)) out_events["jetpt6"] = ak.Array(getN(cms_events.jetptLoose, 6)) out_events["jeteta0"] = ak.Array(getN(cms_events.jetetaTight, 0)) out_events["jeteta1"] = ak.Array(getN(cms_events.jetetaLoose, 1)) out_events["jeteta2"] = ak.Array(getN(cms_events.jetetaLoose, 2)) out_events["jeteta3"] = ak.Array(getN(cms_events.jetetaLoose, 3)) out_events["jeteta4"] = ak.Array(getN(cms_events.jetetaLoose, 4)) out_events["jeteta5"] = ak.Array(getN(cms_events.jetetaLoose, 5)) out_events["jeteta6"] = ak.Array(getN(cms_events.jetetaLoose, 6)) out_events["jetphi0"] = ak.Array(getN(cms_events.jetphiTight, 0)) out_events["jetphi1"] = ak.Array(getN(cms_events.jetphiLoose, 1)) out_events["jetphi2"] = ak.Array(getN(cms_events.jetphiLoose, 2)) out_events["jetflav0"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_tight == True], 0)) out_events["jetflav1"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 1)) out_events["jetflav2"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 2)) out_events["jetflav3"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 3)) out_events["jetflav4"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 4)) out_events["jetflav5"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 5)) out_events["jetflav6"] = ak.Array( getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 6)) out_events["csv0"] = ak.Array( getN(cms_events.jetcsv[cms_events.jet_sel_tight == True], 0)) out_events["csv1"] = ak.Array( getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 1)) out_events["csv2"] = ak.Array( getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 2)) out_events["csv3"] = ak.Array( getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 3)) out_events["SR_2b"] = cms_events["mask_SR2b"] out_events["SR_1b"] = cms_events["mask_SR1b"] out_events["ZeeCR_2b"] = cms_events["mask_Zee2b"] out_events["ZeeCR_1b"] = cms_events["mask_Zee1b"] out_events["ZmumuCR_2b"] = cms_events["mask_Zmumu2b"] out_events["ZmumuCR_1b"] = cms_events["mask_Zmumu1b"] out_events["TopenuCR_2b"] = cms_events["mask_topenu2b"] out_events["TopenuCR_1b"] = cms_events["mask_topenu1b"] out_events["TopmunuCR_2b"] = cms_events["mask_topmunu2b"] out_events["TopmunuCR_1b"] = cms_events["mask_topmunu1b"] out_events["WenuCR_1b"] = cms_events["mask_wenu1b"] out_events["WenuCR_2b"] = cms_events["mask_wenu2b"] out_events["WmunuCR_1b"] = cms_events["mask_wmunu1b"] out_events["WmunuCR_2b"] = cms_events["mask_wmunu2b"] ## btagging SFs from read_sfs import btag_sf from read_sfs import evaluator out_events["btagsf0"] = btag_sf.eval("central", out_events.jetflav0, abs(out_events.jeteta0), out_events.jetpt0) out_events["btagsf1"] = btag_sf.eval("central", out_events.jetflav1, abs(out_events.jeteta1), out_events.jetpt1) out_events["btagsf2"] = btag_sf.eval("central", out_events.jetflav2, abs(out_events.jeteta2), out_events.jetpt2) out_events["btagsf3"] = btag_sf.eval("central", out_events.jetflav3, abs(out_events.jeteta3), out_events.jetpt3) out_events["btagsf4"] = btag_sf.eval("central", out_events.jetflav4, abs(out_events.jeteta4), out_events.jetpt4) out_events["btagsf5"] = btag_sf.eval("central", out_events.jetflav5, abs(out_events.jeteta5), out_events.jetpt5) out_events["btagsf6"] = btag_sf.eval("central", out_events.jetflav6, abs(out_events.jeteta6), out_events.jetpt6) ## btag efficiency out_events["btag_eff_lwp_0"] = evaluator["btag_eff_lwp"]( out_events.jeteta0, out_events.jetpt0) out_events["btag_eff_lwp_1"] = evaluator["btag_eff_lwp"]( out_events.jeteta1, out_events.jetpt1) out_events["ctag_eff_lwp_0"] = evaluator["ctag_eff_lwp"]( out_events.jeteta0, out_events.jetpt0) out_events["ctag_eff_lwp_1"] = evaluator["ctag_eff_lwp"]( out_events.jeteta1, out_events.jetpt1) out_events["ltag_eff_lwp_0"] = evaluator["ltag_eff_lwp"]( out_events.jeteta0, out_events.jetpt0) out_events["ltag_eff_lwp_1"] = evaluator["ltag_eff_lwp"]( out_events.jeteta1, out_events.jetpt1) out_events["btag_eff_mwp_0"] = evaluator["btag_eff_mwp"]( out_events.jeteta0, out_events.jetpt0) out_events["btag_eff_mwp_1"] = evaluator["btag_eff_mwp"]( out_events.jeteta1, out_events.jetpt1) out_events["ctag_eff_mwp_0"] = evaluator["ctag_eff_mwp"]( out_events.jeteta0, out_events.jetpt0) out_events["ctag_eff_mwp_1"] = evaluator["ctag_eff_mwp"]( out_events.jeteta1, out_events.jetpt1) out_events["ltag_eff_mwp_0"] = evaluator["ltag_eff_mwp"]( out_events.jeteta0, out_events.jetpt0) out_events["ltag_eff_mwp_1"] = evaluator["ltag_eff_mwp"]( out_events.jeteta1, out_events.jetpt1) ## ele sfs out_events["eleTightSF0"] = evaluator["EGamma_SF2D_T"](out_events.eleeta0, out_events.elept0) out_events["eleLooseSF1"] = evaluator["EGamma_SF2D_L"](out_events.eleeta1, out_events.elept1) out_events["eleTrigSF0"] = evaluator["EGamma_SF2D_Trig"]( out_events.eleeta0, out_events.elept0) out_events["eleRecoSF0"] = evaluator["EGamma_SF2D_Reco"]( out_events.eleeta0, out_events.elept0) eleRecoSF1_hi = evaluator["EGamma_SF2D_Reco"](out_events.eleeta1, out_events.elept1) eleRecoSF1_lo = evaluator["EGamma_SF2D_Reco_lowpt"](out_events.eleeta1, out_events.elept1) eleRecoSF1_hi_ = ak.fill_none( ak.mask(eleRecoSF1_hi, out_events.elept1 > 20.), 0) eleRecoSF1_lo_ = ak.fill_none( ak.mask(eleRecoSF1_lo, out_events.elept1 > 20.), 0) out_events["eleRecoSF1"] = eleRecoSF1_hi_ + eleRecoSF1_lo_ ## muon sfs bcdef_lumi = 19.554725529 gh_lumi = 16.224846377 total_lumi = bcdef_lumi + gh_lumi ##--------low pt Loose muonLooseIDSF_lowpt1 = ( (bcdef_lumi * evaluator["muon_lowpt_BCDEF_LooseID"] (out_events.mupt1, abs(out_events.mueta1))) + (gh_lumi * evaluator["muon_lowpt_GH_LooseID"] (out_events.mupt1, abs(out_events.mueta1)))) / total_lumi ##----------- medium pt Loose muonLooseIDSF1 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_LooseID"] (out_events.mueta1, out_events.mupt1)) + (gh_lumi * evaluator["muon_highpt_GH_LooseID"] (out_events.mueta1, out_events.mupt1))) / total_lumi muonLooseISOSF1 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_LooseISO"] (out_events.mueta1, out_events.mupt1)) + (gh_lumi * evaluator["muon_highpt_GH_LooseISO"] (out_events.mueta1, out_events.mupt1))) / total_lumi muon_loose_ID_low_SF_1 = ak.fill_none( ak.mask(muonLooseIDSF_lowpt1, out_events.mupt1 < 20.), 0) muon_loose_ID_high_SF_1 = ak.fill_none( ak.mask(muonLooseIDSF1, out_events.mupt1 > 20.), 0) muon_loose_ID_SF_1 = muon_loose_ID_low_SF_1 + muon_loose_ID_high_SF_1 out_events["muLooseSF1"] = muon_loose_ID_SF_1 * muonLooseISOSF1 ##------------medium pt tight muonTightIDSF0 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_TightID"] (out_events.mueta0, out_events.mupt0)) + (gh_lumi * evaluator["muon_highpt_GH_TightID"] (out_events.mueta0, out_events.mupt0))) / total_lumi muonTightISOSF0 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_TightISO"] (out_events.mueta0, out_events.mupt0)) + (gh_lumi * evaluator["muon_highpt_GH_TightISO"] (out_events.mueta0, out_events.mupt0))) / total_lumi out_events["muTightSF0"] = muonTightIDSF0 * muonTightISOSF0 out_events["puweight"] = evaluator["pu_weight"](cms_events.nTrueInt) ## trigger sfs out_events["mettrigWeight"] = evaluator["met_trig"](cms_events.metpt) out_events["recoilWmunutrigWeight"] = evaluator["met_trig"]( cms_events.recoil_Wmunu0) out_events["recoilWenutrigWeight"] = evaluator["met_trig"]( cms_events.recoil_Wenu0) out_events["recoilZmumutrigWeight"] = evaluator["met_trig"]( cms_events.Zmumu_recoil) out_events["recoilZeetrigWeight"] = evaluator["met_trig"]( cms_events.Zee_recoil) ## Fill weights for each CR so that we don't need to worry later out_events["weight_SR_2b"] = out_events.puweight * out_events.mettrigWeight out_events["weight_SR_1b"] = out_events.puweight * out_events.mettrigWeight out_events["weight_ZeeCR_2b"] = out_events.puweight * out_events.eleTrigSF0 out_events["weight_ZeeCR_1b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_ZmumuCR_2b"] = out_events.puweight * out_events.recoilZmumutrigWeight out_events[ "weight_ZmumuCR_1b"] = out_events.puweight * out_events.recoilZmumutrigWeight out_events[ "weight_TopenuCR_2b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_TopenuCR_1b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_TopmunuCR_2b"] = out_events.puweight * out_events.recoilWmunutrigWeight out_events[ "weight_TopmunuCR_1b"] = out_events.puweight * out_events.recoilWmunutrigWeight out_events[ "weight_WenuCR_1b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_WenuCR_2b"] = out_events.puweight * out_events.eleTrigSF0 out_events[ "weight_WmunuCR_1b"] = out_events.puweight * out_events.recoilWmunutrigWeight out_events[ "weight_WmunuCR_2b"] = out_events.puweight * out_events.recoilWmunutrigWeight ## Fill Histograms from variables import vardict, regions, variables_common from binning import binning f = TFile(outputfile, "RECREATE") for ireg in regions: thisregion = out_events[out_events[ireg] == True] thisregion_ = thisregion[~(ak.is_none(thisregion))] weight_ = "weight_" + ireg for ivar in variables_common[ireg]: hist_name_ = "h_reg_" + ireg + "_" + vardict[ivar] h = VarToHist(thisregion_[ivar], thisregion_[weight_], hist_name_, binning[ireg][ivar]) f.cd() h.Write() h_total = TH1F("h_total_mcweight", "h_total_mcweight", 2, 0, 2) h_total.SetBinContent(1, nevents) f.cd() h_total.Write() write_parquet = False if write_parquet: ak.to_parquet(out_events, "analysis_wjets_allevents.parquet")
def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet)>2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): ## Generated leptons gen_lep = ev.GenL leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))] trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))] # Get the leptons. This has changed a couple of times now, but we are using fakeable objects as baseline leptons. # The added p4 instance has the corrected pt (conePt for fakeable) and should be used for any following selection or calculation # Any additional correction (if we choose to do so) should be added here, e.g. Rochester corrections, ... ## Muons mu_v = Collections(ev, "Muon", "vetoTTH", year=year).get() # these include all muons, tight and fakeable mu_t = Collections(ev, "Muon", "tightSSTTH", year=year).get() mu_f = Collections(ev, "Muon", "fakeableSSTTH", year=year).get() muon = ak.concatenate([mu_t, mu_f], axis=1) muon['p4'] = get_four_vec_fromPtEtaPhiM(muon, get_pt(muon), muon.eta, muon.phi, muon.mass, copy=False) #FIXME new ## Electrons el_v = Collections(ev, "Electron", "vetoTTH", year=year).get() el_t = Collections(ev, "Electron", "tightSSTTH", year=year).get() el_f = Collections(ev, "Electron", "fakeableSSTTH", year=year).get() electron = ak.concatenate([el_t, el_f], axis=1) electron['p4'] = get_four_vec_fromPtEtaPhiM(electron, get_pt(electron), electron.eta, electron.phi, electron.mass, copy=False) #FIXME new if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): el_t_p = prompt(el_t) el_t_np = nonprompt(el_t) el_f_p = prompt(el_f) el_f_np = nonprompt(el_f) mu_t_p = prompt(mu_t) mu_t_np = nonprompt(mu_t) mu_f_p = prompt(mu_f) mu_f_np = nonprompt(mu_f) is_flipped = ( (el_t_p.matched_gen.pdgId*(-1) == el_t_p.pdgId) & (abs(el_t_p.pdgId) == 11) ) el_t_p_cc = el_t_p[~is_flipped] # this is tight, prompt, and charge consistent el_t_p_cf = el_t_p[is_flipped] # this is tight, prompt, and charge flipped ## Merge electrons and muons. These are fakeable leptons now lepton = ak.concatenate([muon, electron], axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.p4.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.p4.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] dilepton_mass = (leading_lepton.p4 + trailing_lepton.p4).mass dilepton_pt = (leading_lepton.p4 + trailing_lepton.p4).pt #dilepton_dR = delta_r(leading_lepton, trailing_lepton) dilepton_dR = leading_lepton.p4.delta_r(trailing_lepton.p4) lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.p4.pt, ascending=False)].pdgId, 2, clip=True), 0) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon) n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart) gp = ev.GenPart gp_e = gp[((abs(gp.pdgId)==11)&(gp.status==1)&((gp.statusFlags&(1<<0))==1)&(gp.statusFlags&(1<<8)==256))] gp_m = gp[((abs(gp.pdgId)==13)&(gp.status==1)&((gp.statusFlags&(1<<0))==1)&(gp.statusFlags&(1<<8)==256))] n_gen_lep = ak.num(gp_e) + ak.num(gp_m) else: n_gen_lep = np.zeros(len(ev)) LL = (n_gen_lep > 2) # this is the classifier for LL events (should mainly be ttZ/tZ/WZ...) mt_lep_met = mt(lepton.p4.pt, lepton.p4.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) ## Tau and other stuff tau = getTaus(ev) tau = tau[~match(tau, muon, deltaRCut=0.4)] tau = tau[~match(tau, electron, deltaRCut=0.4)] track = getIsoTracks(ev) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta)<2.4)] btag = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2] bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) ## forward jets j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator # try to get either the most forward light jet, or if there's more than one with eta>1.7, the highest pt one most_fwd = light[ak.argsort(abs(light.eta))][:,0:1] #most_fwd = light[ak.singletons(ak.argmax(abs(light.eta)))] best_fwd = ak.concatenate([j_fwd, most_fwd], axis=1)[:,0:1] jf = cross(j_fwd, jet) mjf = (jf['0']+jf['1']).mass j_fwd2 = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd delta_eta = abs(j_fwd2.eta - j_fwd.eta) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) #st = met_pt + ht + ak.sum(get_pt(muon), axis=1) + ak.sum(get_pt(electron), axis=1) st = met_pt + ht + ak.sum(lepton.p4.pt, axis=1) # define the weight weight = Weights( len(ev) ) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight*cfg['lumi'][self.year]) # PU weight weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) cutflow = Cutflow(output, ev, weight=weight) # slightly restructured # calculate everything from loose, require two tights on top # since n_tight == n_loose == 2, the tight and loose leptons are the same in the end # in this selection we'll get events with exactly two fakeable+tight and two loose leptons. sel = Selection( dataset = dataset, events = ev, year = self.year, ele = electron, ele_veto = el_v, mu = muon, mu_veto = mu_v, jet_all = jet, jet_central = central, jet_btag = btag, jet_fwd = fwd, jet_light = light, met = ev.MET, ) baseline = sel.dilep_baseline(cutflow=cutflow, SS=True, omit=['N_fwd>0']) baseline_OS = sel.dilep_baseline(cutflow=cutflow, SS=False, omit=['N_fwd>0']) # this is for charge flip estimation if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): BL = (baseline & ((ak.num(el_t_p_cc)+ak.num(mu_t_p))==2)) # this is the MC baseline for events with two tight prompt leptons BL_incl = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2)) # this is the MC baseline for events with two tight leptons np_est_sel_mc = (baseline & \ ((((ak.num(el_t_p_cc)+ak.num(mu_t_p))==1) & ((ak.num(el_f_np)+ak.num(mu_f_np))==1)) | (((ak.num(el_t_p_cc)+ak.num(mu_t_p))==0) & ((ak.num(el_f_np)+ak.num(mu_f_np))==2)) )) # no overlap between tight and nonprompt, and veto on additional leptons. this should be enough np_obs_sel_mc = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2) & ((ak.num(el_t_np)+ak.num(mu_t_np))>=1) ) # two tight leptons, at least one nonprompt np_est_sel_data = (baseline & ~baseline) # this has to be false cf_est_sel_mc = (baseline_OS & ((ak.num(el_t_p)+ak.num(mu_t_p))==2)) cf_obs_sel_mc = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2) & ((ak.num(el_t_p_cf))>=1) ) # two tight leptons, at least one electron charge flip cf_est_sel_data = (baseline & ~baseline) # this has to be false weight_np_mc = self.nonpromptWeight.get(el_f_np, mu_f_np, meas='TT') weight_cf_mc = self.chargeflipWeight.flip_weight(el_t_p) else: BL = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2)) BL_incl = BL np_est_sel_mc = (baseline & ~baseline) np_obs_sel_mc = (baseline & ~baseline) np_est_sel_data = (baseline & (ak.num(el_t)+ak.num(mu_t)==1) & (ak.num(el_f)+ak.num(mu_f)==1) ) cf_est_sel_mc = (baseline & ~baseline) cf_obs_sel_mc = (baseline & ~baseline) cf_est_sel_data = (baseline_OS & ((ak.num(el_t)+ak.num(mu_t))==2) ) weight_np_mc = np.zeros(len(ev)) weight_cf_mc = np.zeros(len(ev)) #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event])) run_ = ak.to_numpy(ev.run) lumi_ = ak.to_numpy(ev.luminosityBlock) event_ = ak.to_numpy(ev.event) if False: output['%s_run'%dataset] += processor.column_accumulator(run_[BL]) output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL]) output['%s_event'%dataset] += processor.column_accumulator(event_[BL]) weight_BL = weight.weight()[BL] # this is just a shortened weight list for the two prompt selection weight_np_data = self.nonpromptWeight.get(el_f, mu_f, meas='data') weight_cf_data = self.chargeflipWeight.flip_weight(el_t) out_sel = (BL | np_est_sel_mc | cf_est_sel_mc) dummy = (np.ones(len(ev))==1) def fill_multiple_np(hist, arrays, add_sel=dummy): #reg_sel = [BL, np_est_sel_mc, np_obs_sel_mc, np_est_sel_data, cf_est_sel_mc, cf_obs_sel_mc, cf_est_sel_data], #print ('len', len(reg_sel[0])) #print ('sel', reg_sel[0]) reg_sel = [BL&add_sel, BL_incl&add_sel, np_est_sel_mc&add_sel, np_obs_sel_mc&add_sel, np_est_sel_data&add_sel, cf_est_sel_mc&add_sel, cf_obs_sel_mc&add_sel, cf_est_sel_data&add_sel], fill_multiple( hist, datasets=[ dataset, # only prompt contribution from process dataset+"_incl", # everything from process (inclusive MC truth) "np_est_mc", # MC based NP estimate "np_obs_mc", # MC based NP observation "np_est_data", "cf_est_mc", "cf_obs_mc", "cf_est_data", ], arrays=arrays, selections=reg_sel[0], # no idea where the additional dimension is coming from... weights=[ weight.weight()[reg_sel[0][0]], weight.weight()[reg_sel[0][1]], weight.weight()[reg_sel[0][2]]*weight_np_mc[reg_sel[0][2]], weight.weight()[reg_sel[0][3]], weight.weight()[reg_sel[0][4]]*weight_np_data[reg_sel[0][4]], weight.weight()[reg_sel[0][5]]*weight_cf_mc[reg_sel[0][5]], weight.weight()[reg_sel[0][6]], weight.weight()[reg_sel[0][7]]*weight_cf_data[reg_sel[0][7]], ], ) if self.evaluate or self.dump: # define the inputs to the NN # this is super stupid. there must be a better way. # used a np.stack which is ok performance wise. pandas data frame seems to be slow and memory inefficient #FIXME no n_b, n_fwd back in v13/v14 of the DNN NN_inputs_d = { 'n_jet': ak.to_numpy(ak.num(jet)), 'n_fwd': ak.to_numpy(ak.num(fwd)), 'n_b': ak.to_numpy(ak.num(btag)), 'n_tau': ak.to_numpy(ak.num(tau)), #'n_track': ak.to_numpy(ak.num(track)), 'st': ak.to_numpy(st), 'met': ak.to_numpy(ev.MET.pt), 'mjj_max': ak.to_numpy(ak.fill_none(ak.max(mjf, axis=1),0)), 'delta_eta_jj': ak.to_numpy(pad_and_flatten(delta_eta)), 'lead_lep_pt': ak.to_numpy(pad_and_flatten(leading_lepton.p4.pt)), 'lead_lep_eta': ak.to_numpy(pad_and_flatten(leading_lepton.p4.eta)), 'sublead_lep_pt': ak.to_numpy(pad_and_flatten(trailing_lepton.p4.pt)), 'sublead_lep_eta': ak.to_numpy(pad_and_flatten(trailing_lepton.p4.eta)), 'dilepton_mass': ak.to_numpy(pad_and_flatten(dilepton_mass)), 'dilepton_pt': ak.to_numpy(pad_and_flatten(dilepton_pt)), 'fwd_jet_pt': ak.to_numpy(pad_and_flatten(best_fwd.pt)), 'fwd_jet_p': ak.to_numpy(pad_and_flatten(best_fwd.p)), 'fwd_jet_eta': ak.to_numpy(pad_and_flatten(best_fwd.eta)), 'lead_jet_pt': ak.to_numpy(pad_and_flatten(jet[:, 0:1].pt)), 'sublead_jet_pt': ak.to_numpy(pad_and_flatten(jet[:, 1:2].pt)), 'lead_jet_eta': ak.to_numpy(pad_and_flatten(jet[:, 0:1].eta)), 'sublead_jet_eta': ak.to_numpy(pad_and_flatten(jet[:, 1:2].eta)), 'lead_btag_pt': ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1].pt)), 'sublead_btag_pt': ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2].pt)), 'lead_btag_eta': ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1].eta)), 'sublead_btag_eta': ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2].eta)), 'min_bl_dR': ak.to_numpy(ak.fill_none(min_bl_dR, 0)), 'min_mt_lep_met': ak.to_numpy(ak.fill_none(min_mt_lep_met, 0)), } if self.dump: for k in NN_inputs_d.keys(): output[k] += processor.column_accumulator(NN_inputs_d[k][out_sel]) if self.evaluate: NN_inputs = np.stack( [NN_inputs_d[k] for k in NN_inputs_d.keys()] ) NN_inputs = np.nan_to_num(NN_inputs, 0, posinf=1e5, neginf=-1e5) # events with posinf/neginf/nan will not pass the BL selection anyway NN_inputs = np.moveaxis(NN_inputs, 0, 1) # this is needed for a np.stack (old version) model, scaler = load_onnx_model(self.training) try: NN_inputs_scaled = scaler.transform(NN_inputs) NN_pred = predict_onnx(model, NN_inputs_scaled) best_score = np.argmax(NN_pred, axis=1) except ValueError: print ("Problem with prediction. Showing the shapes here:") print (np.shape(NN_inputs)) print (np.shape(weight_BL)) NN_pred = np.array([]) best_score = np.array([]) NN_inputs_scaled = NN_inputs raise ##k.clear_session() #FIXME below needs to be fixed again with changed NN evaluation. Should work now fill_multiple_np(output['node'], {'multiplicity':best_score}) fill_multiple_np(output['node0_score_incl'], {'score':NN_pred[:,0]}) fill_multiple_np(output['node1_score_incl'], {'score':NN_pred[:,1]}) fill_multiple_np(output['node2_score_incl'], {'score':NN_pred[:,2]}) fill_multiple_np(output['node3_score_incl'], {'score':NN_pred[:,3]}) fill_multiple_np(output['node4_score_incl'], {'score':NN_pred[:,4]}) fill_multiple_np(output['node0_score'], {'score':NN_pred[:,0]}, add_sel=(best_score==0)) fill_multiple_np(output['node1_score'], {'score':NN_pred[:,1]}, add_sel=(best_score==1)) fill_multiple_np(output['node2_score'], {'score':NN_pred[:,2]}, add_sel=(best_score==2)) fill_multiple_np(output['node3_score'], {'score':NN_pred[:,3]}, add_sel=(best_score==3)) fill_multiple_np(output['node4_score'], {'score':NN_pred[:,4]}, add_sel=(best_score==4)) #SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0))) #SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0))) #leading_lepton_BL = leading_lepton[BL] #output['lead_lep_SR_pp'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)), # weight = weight_BL[SR_sel_pp] #) #output['lead_lep_SR_mm'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)), # weight = weight_BL[SR_sel_mm] #) del model del scaler del NN_inputs, NN_inputs_scaled, NN_pred labels = {'topW_v3': 0, 'TTW':1, 'TTZ': 2, 'TTH': 3, 'ttbar': 4, 'rare':5, 'diboson':6} # these should be all? if dataset in labels: label_mult = labels[dataset] else: label_mult = 7 # data or anything else if self.dump: output['label'] += processor.column_accumulator(np.ones(len(ev[out_sel])) * label_mult) output['SS'] += processor.column_accumulator(ak.to_numpy(BL[out_sel])) output['OS'] += processor.column_accumulator(ak.to_numpy(cf_est_sel_mc[out_sel])) output['AR'] += processor.column_accumulator(ak.to_numpy(np_est_sel_mc[out_sel])) output['LL'] += processor.column_accumulator(ak.to_numpy(LL[out_sel])) output['weight'] += processor.column_accumulator(ak.to_numpy(weight.weight()[out_sel])) output['weight_np'] += processor.column_accumulator(ak.to_numpy(weight_np_mc[out_sel])) output['weight_cf'] += processor.column_accumulator(ak.to_numpy(weight_cf_mc[out_sel])) # first, make a few super inclusive plots output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL) output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL) fill_multiple_np(output['N_jet'], {'multiplicity': ak.num(jet)}) fill_multiple_np(output['N_b'], {'multiplicity': ak.num(btag)}) fill_multiple_np(output['N_central'], {'multiplicity': ak.num(central)}) fill_multiple_np(output['N_ele'], {'multiplicity':ak.num(electron)}) fill_multiple_np(output['N_mu'], {'multiplicity':ak.num(muon)}) fill_multiple_np(output['N_fwd'], {'multiplicity':ak.num(fwd)}) fill_multiple_np(output['ST'], {'ht': st}) fill_multiple_np(output['HT'], {'ht': ht}) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL) output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL) output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL) output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL) output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL) output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL) output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL) fill_multiple_np(output['MET'], {'pt':ev.MET.pt, 'phi':ev.MET.phi}) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): output['lead_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)), weight = weight_BL ) output['trail_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)), weight = weight_BL ) fill_multiple_np( output['lead_lep'], { 'pt': pad_and_flatten(leading_lepton.p4.pt), 'eta': pad_and_flatten(leading_lepton.eta), 'phi': pad_and_flatten(leading_lepton.phi), }, ) fill_multiple_np( output['trail_lep'], { 'pt': pad_and_flatten(trailing_lepton.p4.pt), 'eta': pad_and_flatten(trailing_lepton.eta), 'phi': pad_and_flatten(trailing_lepton.phi), }, ) output['j1'].fill( dataset = dataset, pt = ak.flatten(jet.pt_nom[:, 0:1][BL]), eta = ak.flatten(jet.eta[:, 0:1][BL]), phi = ak.flatten(jet.phi[:, 0:1][BL]), weight = weight_BL ) output['j2'].fill( dataset = dataset, pt = ak.flatten(jet[:, 1:2][BL].pt_nom), eta = ak.flatten(jet[:, 1:2][BL].eta), phi = ak.flatten(jet[:, 1:2][BL].phi), weight = weight_BL ) output['j3'].fill( dataset = dataset, pt = ak.flatten(jet[:, 2:3][BL].pt_nom), eta = ak.flatten(jet[:, 2:3][BL].eta), phi = ak.flatten(jet[:, 2:3][BL].phi), weight = weight_BL ) fill_multiple_np( output['fwd_jet'], { 'pt': pad_and_flatten(best_fwd.pt), 'eta': pad_and_flatten(best_fwd.eta), 'phi': pad_and_flatten(best_fwd.phi), }, ) #output['fwd_jet'].fill( # dataset = dataset, # pt = ak.flatten(j_fwd[BL].pt), # eta = ak.flatten(j_fwd[BL].eta), # phi = ak.flatten(j_fwd[BL].phi), # weight = weight_BL #) output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(best_fwd[BL].p), weight = weight_BL) return output
def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet) > 2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) ## Muons muon = Collections(ev, "Muon", "tightSSTTH").get() vetomuon = Collections(ev, "Muon", "vetoTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1) OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "tightSSTTH").get() vetoelectron = Collections(ev, "Electron", "vetoTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1) OSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 lepton = ak.concatenate([muon, electron], axis=1) dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0, axis=1) OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0, axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] dilepton_mass = (leading_lepton + trailing_lepton).mass dilepton_pt = (leading_lepton + trailing_lepton).pt dilepton_dR = delta_r(leading_lepton, trailing_lepton) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort( jet.pt_nom, ascending=False )] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) ## forward jets high_p_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator high_pt_fwd = fwd[ak.singletons(ak.argmax( fwd.pt_nom, axis=1))] # highest transverse momentum spectator high_eta_fwd = fwd[ak.singletons(ak.argmax(abs( fwd.eta), axis=1))] # most forward spectator ## Get the two leading b-jets in terms of btag score high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2] jf = cross(high_p_fwd, jet) mjf = (jf['0'] + jf['1']).mass deltaEta = abs(high_p_fwd.eta - jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta) deltaEtaMax = ak.max(deltaEta, axis=1) mjf_max = ak.max(mjf, axis=1) jj = choose(jet, 2) mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) ht_central = ak.sum(central.pt, axis=1) tau = getTaus(ev) track = getIsoTracks(ev) bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) # define the weight weight = Weights(len(ev)) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight * cfg['lumi'][self.year]) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) #weight.add("trigger", self.triggerSF.get(electron, muon)) cutflow = Cutflow(output, ev, weight=weight) sel = Selection( dataset=dataset, events=ev, year=self.year, ele=electron, ele_veto=vetoelectron, mu=muon, mu_veto=vetomuon, jet_all=jet, jet_central=central, jet_btag=btag, jet_fwd=fwd, met=ev.MET, ) BL = sel.dilep_baseline(cutflow=cutflow, SS=False) # first, make a few super inclusive plots output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL]) output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL]) output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL]) output['N_tau'].fill(dataset=dataset, multiplicity=ak.num(tau)[BL], weight=weight.weight()[BL]) output['N_track'].fill(dataset=dataset, multiplicity=ak.num(track)[BL], weight=weight.weight()[BL]) BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0']) output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb]) output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL]) output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL]) output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL]) BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0']) output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd]) output['dilep_pt'].fill(dataset=dataset, pt=ak.flatten(dilepton_pt[BL]), weight=weight.weight()[BL]) output['dilep_mass'].fill(dataset=dataset, mass=ak.flatten(dilepton_mass[BL]), weight=weight.weight()[BL]) output['mjf_max'].fill(dataset=dataset, mass=mjf_max[BL], weight=weight.weight()[BL]) output['deltaEta'].fill(dataset=dataset, eta=ak.flatten(deltaEta[BL]), weight=weight.weight()[BL]) output['min_bl_dR'].fill(dataset=dataset, eta=min_bl_dR[BL], weight=weight.weight()[BL]) output['min_mt_lep_met'].fill(dataset=dataset, pt=min_mt_lep_met[BL], weight=weight.weight()[BL]) output['leading_jet_pt'].fill(dataset=dataset, pt=ak.flatten(jet[:, 0:1][BL].pt), weight=weight.weight()[BL]) output['subleading_jet_pt'].fill(dataset=dataset, pt=ak.flatten(jet[:, 1:2][BL].pt), weight=weight.weight()[BL]) output['leading_jet_eta'].fill(dataset=dataset, eta=ak.flatten(jet[:, 0:1][BL].eta), weight=weight.weight()[BL]) output['subleading_jet_eta'].fill(dataset=dataset, eta=ak.flatten(jet[:, 1:2][BL].eta), weight=weight.weight()[BL]) output['leading_btag_pt'].fill(dataset=dataset, pt=ak.flatten( high_score_btag[:, 0:1][BL].pt), weight=weight.weight()[BL]) output['subleading_btag_pt'].fill(dataset=dataset, pt=ak.flatten( high_score_btag[:, 1:2][BL].pt), weight=weight.weight()[BL]) output['leading_btag_eta'].fill(dataset=dataset, eta=ak.flatten( high_score_btag[:, 0:1][BL].eta), weight=weight.weight()[BL]) output['subleading_btag_eta'].fill( dataset=dataset, eta=ak.flatten(high_score_btag[:, 1:2][BL].eta), weight=weight.weight()[BL]) BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50']) output['MET'].fill(dataset=dataset, pt=ev.MET[BL_minusMET].pt, phi=ev.MET[BL_minusMET].phi, weight=weight.weight()[BL_minusMET]) #output['electron'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(electron[BL].pt)), # eta = ak.to_numpy(ak.flatten(electron[BL].eta)), # phi = ak.to_numpy(ak.flatten(electron[BL].phi)), # weight = weight.weight()[BL] #) # #output['muon'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(muon[BL].pt)), # eta = ak.to_numpy(ak.flatten(muon[BL].eta)), # phi = ak.to_numpy(ak.flatten(muon[BL].phi)), # weight = weight.weight()[BL] #) output['lead_lep'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)), eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)), phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)), weight=weight.weight()[BL]) output['trail_lep'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)), eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)), phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)), weight=weight.weight()[BL]) output['fwd_jet'].fill(dataset=dataset, pt=ak.flatten(high_p_fwd[BL].pt_nom), eta=ak.flatten(high_p_fwd[BL].eta), phi=ak.flatten(high_p_fwd[BL].phi), weight=weight.weight()[BL]) output['b1'].fill(dataset=dataset, pt=ak.flatten(high_score_btag[:, 0:1][BL].pt_nom), eta=ak.flatten(high_score_btag[:, 0:1][BL].eta), phi=ak.flatten(high_score_btag[:, 0:1][BL].phi), weight=weight.weight()[BL]) output['b2'].fill(dataset=dataset, pt=ak.flatten(high_score_btag[:, 1:2][BL].pt_nom), eta=ak.flatten(high_score_btag[:, 1:2][BL].eta), phi=ak.flatten(high_score_btag[:, 1:2][BL].phi), weight=weight.weight()[BL]) output['j1'].fill(dataset=dataset, pt=ak.flatten(jet.pt_nom[:, 0:1][BL]), eta=ak.flatten(jet.eta[:, 0:1][BL]), phi=ak.flatten(jet.phi[:, 0:1][BL]), weight=weight.weight()[BL]) output['j2'].fill(dataset=dataset, pt=ak.flatten(jet[:, 1:2][BL].pt_nom), eta=ak.flatten(jet[:, 1:2][BL].eta), phi=ak.flatten(jet[:, 1:2][BL].phi), weight=weight.weight()[BL]) output['j3'].fill(dataset=dataset, pt=ak.flatten(jet[:, 2:3][BL].pt_nom), eta=ak.flatten(jet[:, 2:3][BL].eta), phi=ak.flatten(jet[:, 2:3][BL].phi), weight=weight.weight()[BL]) if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event])) run_ = ak.to_numpy(ev.run) lumi_ = ak.to_numpy(ev.luminosityBlock) event_ = ak.to_numpy(ev.event) output['%s_run' % dataset] += processor.column_accumulator( run_[BL]) output['%s_lumi' % dataset] += processor.column_accumulator( lumi_[BL]) output['%s_event' % dataset] += processor.column_accumulator( event_[BL]) # Now, take care of systematic unceratinties if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): alljets = getJets(ev, minPt=0, maxEta=4.7) alljets = alljets[(alljets.jetId > 1)] for var in self.variations: # get the collections that change with the variations jet = getPtEtaPhi(alljets, pt_var=var) jet = jet[(jet.pt > 25)] jet = jet[~match( jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) ## forward jets high_p_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator high_pt_fwd = fwd[ak.singletons(ak.argmax( fwd.pt, axis=1))] # highest transverse momentum spectator high_eta_fwd = fwd[ak.singletons( ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator ## Get the two leading b-jets in terms of btag score high_score_btag = central[ak.argsort( central.btagDeepFlavB)][:, :2] met = ev.MET #met['pt'] = getattr(met, var) sel = Selection( dataset=dataset, events=ev, year=self.year, ele=electron, ele_veto=vetoelectron, mu=muon, mu_veto=vetomuon, jet_all=jet, jet_central=central, jet_btag=btag, jet_fwd=fwd, met=met, ) BL = sel.dilep_baseline(SS=False) # get the modified selection -> more difficult #selection.add('N_jet>2_'+var, (ak.num(jet.pt)>=3)) # stupid bug here... #selection.add('N_btag=2_'+var, (ak.num(btag)==2) ) #selection.add('N_central>1_'+var, (ak.num(central)>=2) ) #selection.add('N_fwd>0_'+var, (ak.num(fwd)>=1) ) #selection.add('MET>30_'+var, (getattr(ev.MET, var)>30) ) ### Don't change the selection for now... #bl_reqs = os_reqs + ['N_jet>2_'+var, 'MET>30_'+var, 'N_btag=2_'+var, 'N_central>1_'+var, 'N_fwd>0_'+var] #bl_reqs_d = { sel: True for sel in bl_reqs } #BL = selection.require(**bl_reqs_d) # the OS selection remains unchanged output['N_jet_' + var].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL]) BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0']) output['N_fwd_' + var].fill( dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd]) BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0']) output['N_b_' + var].fill( dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb]) output['N_central_' + var].fill( dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL]) # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots. output['j1_' + var].fill(dataset=dataset, pt=ak.flatten(jet.pt[:, 0:1][BL]), eta=ak.flatten(jet.eta[:, 0:1][BL]), phi=ak.flatten(jet.phi[:, 0:1][BL]), weight=weight.weight()[BL]) output['b1_' + var].fill( dataset=dataset, pt=ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]), eta=ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]), phi=ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]), weight=weight.weight()[BL]) output['fwd_jet_' + var].fill( dataset=dataset, pt=ak.flatten(high_p_fwd[BL].pt), #p = ak.flatten(high_p_fwd[BL].p), eta=ak.flatten(high_p_fwd[BL].eta), phi=ak.flatten(high_p_fwd[BL].phi), weight=weight.weight()[BL]) BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50']) output['MET_' + var].fill( dataset=dataset, #pt = getattr(ev.MET, var)[BL_minusMET], pt=ev.MET[BL_minusMET].pt, phi=ev.MET[BL_minusMET].phi, weight=weight.weight()[BL_minusMET]) return output
cms_events["jetptLoose"] = cms_events.jetpt[cms_events.jet_sel_loose == True] cms_events["jetetaLoose"] = cms_events.jeteta[cms_events.jet_sel_loose == True] cms_events["jetphiLoose"] = cms_events.jetphi[cms_events.jet_sel_loose == True] cms_events["jet_sel_tight0"] = ak.Array( getN(cms_events.jet_sel_tight[cms_events.jet_sel_loose == True], 0)) cms_events["jet_sel_b_0"] = ak.Array(getN(cms_events.jet_sel_b, 0)) cms_events["jet_sel_b_1"] = ak.Array(getN(cms_events.jet_sel_b, 1)) cms_events["nJetLoose"] = ak.sum(cms_events.jet_sel_loose, axis=-1) cms_events["nJetTight"] = ak.sum(cms_events.jet_sel_tight, axis=-1) cms_events["nJetb"] = ak.sum(cms_events.jet_sel_b, axis=-1) cms_events["dphi_jet_met"] = DeltaPhi( cms_events.jetphi[cms_events.jet_sel_loose == True], cms_events.metphi) cms_events["min_dphi_jet_met"] = ak.min(cms_events.dphi_jet_met, axis=-1) #-------------------------------------------------------------------------------------------------- ## W --> lepton + nu #-------------------------------------------------------------------------------------------------- mask_wmunu1b = ((cms_events.nEleLoose == 0) & (cms_events.npho == 0) & (cms_events.ntau == 0) & (cms_events.nMuLoose == 1) & (cms_events.nMuTight == 1) & (cms_events.recoil_Wmunu0 > 200.) & (cms_events.min_dphi_jet_met > 0.5) & (cms_events.nJetLoose == 1) & (cms_events.nJetTight == 1) & (cms_events.nJetb == 1) & (cms_events.mt_Wmunu0 > 0) & (cms_events.mt_Wmunu0 < 160)) cms_events["mask_wmunu1b"] = mask_wmunu1b '''