def process(self, df): """ Processing function. This is where the actual analysis happens. """ output = self.accumulator.identity() dataset = df["dataset"] cfg = loadConfig() ## Muons muon = Collections(df, "Muon", "tight").get() vetomuon = Collections(df, "Muon", "veto").get() dimuon = muon.choose(2) SSmuon = (dimuon[(dimuon.i0.charge * dimuon.i1.charge) > 0].counts > 0) ## Electrons electron = Collections(df, "Electron", "tight").get() vetoelectron = Collections(df, "Electron", "veto").get() dielectron = electron.choose(2) SSelectron = (dielectron[(dielectron.i0.charge * dielectron.i1.charge) > 0].counts > 0) ## E/Mu cross dilepton = electron.cross(muon) SSdilepton = ( dilepton[(dilepton.i0.charge * dilepton.i1.charge) > 0].counts > 0) ## how to get leading lepton easily? Do I actually care? leading_muon = muon[muon.pt.argmax()] leading_electron = electron[electron.pt.argmax()] lepton = mergeArray(electron, muon) ''' ok so this is getting **really** awkward (pun slightly intended). because the mergeArray function builds a JaggedArray that has a UnionArry as .content, which in turn does not work with .argmax(), we need to build a jagged array just holding the pts ''' lepton_pt = awkward.concatenate([electron.pt, muon.pt], axis=1) leading_lep_index = lepton_pt.argmax() trailing_lep_index = lepton_pt.argmin() #leading_lep = lepton[lepton.p4.pt().argmax()] #leading_lep_pt = lepton.p4.fPt[:,:1] leading_lep_pt = lepton[leading_lep_index].p4.fPt.max( ) # taking the max here has no impact, but otherwise code fails leading_lep_eta = lepton[leading_lep_index].p4.fEta.max( ) # taking the max here has no impact, but otherwise code fails trailing_lep_pt = lepton[trailing_lep_index].p4.fPt.max( ) # taking the max here has no impact, but otherwise code fails trailing_lep_eta = lepton[trailing_lep_index].p4.fEta.max( ) # taking the max here has no impact, but otherwise code fails ## Jets jet = JaggedCandidateArray.candidatesfromcounts( df['nJet'], pt=df['Jet_pt'].content, eta=df['Jet_eta'].content, phi=df['Jet_phi'].content, mass=df['Jet_mass'].content, jetId=df['Jet_jetId']. content, # https://twiki.cern.ch/twiki/bin/view/CMS/JetID puId=df['Jet_puId']. content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJetID btagDeepB=df['Jet_btagDeepB']. content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X ) jet = jet[(jet.pt > 25) & (jet.jetId > 1)] jet = jet[~jet.match( muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~jet.match( electron, deltaRCut=0.4)] # remove jets that overlap with electrons btag = jet[(jet.btagDeepB > 0.4184) & (abs(jet.eta) < 2.4)] central = jet[(abs(jet.eta) < 2.4)] light = jet[((jet.btagDeepB < 0.4184) & (abs(jet.eta) < 2.4)) | (abs(jet.eta) >= 2.4)] lightCentral = jet[(jet.btagDeepB < 0.4184) & (abs(jet.eta) < 2.4) & (jet.pt > 30)] fw = light[abs(light.eta).argmax()] # the most forward light jet leading_jet = jet[jet.pt.argmax()] leading_b = btag[btag.pt.argmax()] mass_eb = electron.cross(btag).mass mass_mub = muon.cross(btag).mass mass_lb = awkward.concatenate([mass_eb, mass_mub], axis=1) mlb_min = mass_lb.min() mlb_max = mass_lb.max() mll = awkward.concatenate( [dimuon.mass, dielectron.mass, dilepton.mass], axis=1).max() # max shouldn't matter, again ej = electron.cross(jet) muj = muon.cross(jet) deltaR_ej = ej.i0.p4.delta_r(ej.i1.p4) deltaR_muj = muj.i0.p4.delta_r(muj.i1.p4) deltaR_lj = awkward.concatenate([deltaR_ej, deltaR_muj], axis=1) deltaR_lj_min = deltaR_lj.min() ## MET -> can switch to puppi MET met_pt = df["MET_pt"] met_phi = df["MET_phi"] ## other variables st = df["MET_pt"] + jet.pt.sum() + muon.pt.sum() + electron.pt.sum() ht = jet.pt.sum() light_light = light.choose(2) mjj_max = light_light[light_light.mass.argmax()].mass ## define selections (maybe move to a different file at some point) dilep = ((electron.counts + muon.counts) == 2) leppt = (((electron.pt > 25).counts + (muon.pt > 25).counts) > 0) lepveto = ((vetoelectron.counts + vetomuon.counts) == 2) SS = (SSelectron | SSmuon | SSdilepton) output['totalEvents']['all'] += len(df['weight']) # Cutflow processes = [ 'tW_scattering', 'TTW', 'TTZ', 'TTH', 'TTTT', 'diboson', 'ttbar', 'DY' ] cutflow = Cutflow(output, df, cfg, processes) cutflow.addRow('dilep', dilep) cutflow.addRow('leppt', leppt) cutflow.addRow('lepveto', lepveto) #cutflow.addRow( 'SS', SS ) cutflow.addRow('njet4', (jet.counts >= 4)) cutflow.addRow('light2', (light.counts >= 2)) cutflow.addRow('nbtag', btag.counts > 0) baseline = cutflow.selection output['passedEvents']['all'] += len(df['weight'][baseline]) nEventsBaseline = len(df['weight'][baseline]) signal_label = np.ones(nEventsBaseline) if ( df['dataset'] == 'tW_scattering' or df['dataset'] == 'TTW') else np.zeros(nEventsBaseline) df_out = pd.DataFrame({ 'j0_pt': leading_jet[baseline].pt.flatten(), 'j0_eta': leading_jet[baseline].eta.flatten(), 'l0_pt': leading_lep_pt[baseline].flatten(), 'l0_eta': leading_lep_eta[baseline].flatten(), # this was the problem 'l1_pt': trailing_lep_pt[baseline].flatten(), 'l1_eta': trailing_lep_eta[baseline].flatten(), # this was the problem 'st': st[baseline].flatten(), 'ht': ht[baseline].flatten(), 'njet': jet.counts[baseline].flatten(), 'nbtag': btag.counts[baseline].flatten(), 'met': met_pt[baseline].flatten(), 'mjj_max': mjj_max[baseline].flatten(), 'mlb_min': mlb_min[baseline].flatten(), 'mlb_max': mlb_max[baseline].flatten(), 'deltaR_lj_min': deltaR_lj_min[baseline].flatten(), 'mll': mll[baseline].flatten(), 'signal': signal_label, 'weight': df['weight'][baseline] }) df_out.to_hdf('data/data_X.h5', key='df', format='table', mode='a', append=True) return output
import pandas as pd import os import matplotlib import matplotlib.pyplot as plt from matplotlib.colors import LogNorm import numpy as np from klepto.archives import dir_archive # import all the colors and tools for plotting from Tools.helpers import loadConfig from helpers import * # load the configuration cfg = loadConfig() year = 2018 # load the results cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), 'WH_%s' % year), serialized=True) #cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cfg['caches']['WH_small']), serialized=True) cache.load() histograms = cache.get('histograms') output = cache.get('simple_output') plotDir = os.path.expandvars(cfg['meta']['plots']) + '/plots_WH_%s/' % year finalizePlotDir(plotDir)
def process(self, df): """ Processing function. This is where the actual analysis happens. """ output = self.accumulator.identity() dataset = df["dataset"] cfg = loadConfig() ## MET -> can switch to puppi MET met_pt = df["MET_pt"] met_phi = df["MET_phi"] ## Muons muon = JaggedCandidateArray.candidatesfromcounts( df['nMuon'], pt=df['Muon_pt'].content, eta=df['Muon_eta'].content, phi=df['Muon_phi'].content, mass=df['Muon_mass'].content, miniPFRelIso_all=df['Muon_miniPFRelIso_all'].content, looseId=df['Muon_looseId'].content) muon = muon[(muon.pt > 10) & (abs(muon.eta) < 2.4) & (muon.looseId) & (muon.miniPFRelIso_all < 0.2)] #muon = Collections(df, "Muon", "tightTTH").get() # this needs a fix for DASK electrons = JaggedCandidateArray.candidatesfromcounts( df['nElectron'], pt=df['Electron_pt'].content, eta=df['Electron_eta'].content, phi=df['Electron_phi'].content, mass=df['Electron_mass'].content, pdgid=df['Electron_pdgId'].content, mini_iso=df['Electron_miniPFRelIso_all'].content) ## Electrons electron = JaggedCandidateArray.candidatesfromcounts( df['nElectron'], pt=df['Electron_pt'].content, eta=df['Electron_eta'].content, phi=df['Electron_phi'].content, mass=df['Electron_mass'].content, miniPFRelIso_all=df['Electron_miniPFRelIso_all'].content, cutBased=df['Electron_cutBased'].content) electron = electron[(electron.pt > 10) & (abs(electron.eta) < 2.4) & (electron.miniPFRelIso_all < 0.1) & (electron.cutBased >= 1)] #electron = Collections(df, "Electron", "tightTTH").get() # this needs a fix for DASK ## FatJets fatjet = JaggedCandidateArray.candidatesfromcounts( df['nFatJet'], pt=df['FatJet_pt'].content, eta=df['FatJet_eta'].content, phi=df['FatJet_phi'].content, mass=df['FatJet_mass'].content, msoftdrop=df["FatJet_msoftdrop"].content, deepTagMD_HbbvsQCD=df['FatJet_deepTagMD_HbbvsQCD'].content, deepTagMD_WvsQCD=df['FatJet_deepTagMD_WvsQCD'].content, deepTag_WvsQCD=df['FatJet_deepTag_WvsQCD'].content) leadingFatJets = fatjet[:, :2] difatjet = leadingFatJets.choose(2) dphiDiFatJet = np.arccos(np.cos(difatjet.i0.phi - difatjet.i1.phi)) nfatjets = fatjet.counts htag = fatjet[((fatjet.pt > 200) & (fatjet.deepTagMD_HbbvsQCD > 0.8365))] htag_hard = fatjet[((fatjet.pt > 300) & (fatjet.deepTagMD_HbbvsQCD > 0.8365))] lead_htag = htag[htag.pt.argmax()] wtag = fatjet[((fatjet.pt > 200) & (fatjet.deepTagMD_HbbvsQCD < 0.8365) & (fatjet.deepTag_WvsQCD > 0.918))] wtag_hard = fatjet[((fatjet.pt > 300) & (fatjet.deepTagMD_HbbvsQCD < 0.8365) & (fatjet.deepTag_WvsQCD > 0.918))] lead_wtag = wtag[wtag.pt.argmax()] wh = lead_htag.cross(lead_wtag) #wh_deltaPhi = np.arccos(wh.i0.phi - wh.i1.phi) wh_deltaR = wh.i0.p4.delta_r(wh.i1.p4) ## Jets jet = JaggedCandidateArray.candidatesfromcounts( df['nJet'], pt=df['Jet_pt'].content, eta=df['Jet_eta'].content, phi=df['Jet_phi'].content, mass=df['Jet_mass'].content, jetId=df['Jet_jetId']. content, # https://twiki.cern.ch/twiki/bin/view/CMS/JetID #puId = df['Jet_puId'].content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJetID btagDeepB=df['Jet_btagDeepB']. content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X #deepJet = df['Jet_'].content # not there yet? ) jet = jet[(jet.pt > 30) & (abs(jet.eta) < 2.4) & (jet.jetId > 0)] jet = jet[(jet.pt > 30) & (jet.jetId > 1) & (abs(jet.eta) < 2.4)] jet = jet[~jet.match( muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~jet.match( electron, deltaRCut=0.4)] # remove jets that overlap with electrons jet = jet[jet.pt.argsort(ascending=False)] # sort the jets btag = jet[(jet.btagDeepB > 0.4184)] light = jet[(jet.btagDeepB < 0.4184)] njets = jet.counts nbjets = btag.counts ## Get the leading b-jets high_score_btag = jet[jet.btagDeepB.argsort(ascending=False)][:, :2] leadingJets = jet[:, :2] dijet = leadingJets.choose(2) dphiDiJet = np.arccos(np.cos(dijet.i0.phi - dijet.i1.phi)) leading_jet = leadingJets[leadingJets.pt.argmax()] subleading_jet = leadingJets[leadingJets.pt.argmin()] leading_b = btag[btag.pt.argmax()] bb = high_score_btag.choose(2) bb_deltaPhi = np.arccos(np.cos(bb.i0.phi - bb.i1.phi)) bb_deltaR = bb.i0.p4.delta_r(bb.i1.p4) mtb = mt(btag.pt, btag.phi, met_pt, met_phi) ## other variables ht = jet.pt.sum() #met_sig = met_pt/np.sqrt(ht) min_dphiJetMet4 = np.arccos(np.cos(jet[:, :4].phi - met_phi)).min() #goodjcut = ((jets.pt>30) & (abs(jets.eta)<2.4) & (jets.jetid>0)) #goodjets = jets[goodjcut] #abs_min_dphi_met_leadjs4 = abs(np.arccos(np.cos(goodjets[:,:4].phi-metphi)).min()) #print(min_dphiJetMet4.shape) #print(self.means['min_dphi_met_j4'].shape) ht_ps = (ht > 0) met_ps = (met_pt > 250) njet_ps = (njets >= 2) bjet_ps = (nbjets >= 1) fatjet_sel = (nfatjets >= 1) e_sel = (electron.counts == 0) m_sel = (muon.counts == 0) #it_sel = (veto_it.counts == 0) #t_sel = (veto_t.counts == 0) l_sel = e_sel & m_sel # & it_sel & t_sel h_sel = (htag.counts > 0) wmc_sel = (wtag.counts > 0) sel = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & h_sel #& wmc_sel met_sig = met_pt[sel] / np.sqrt(ht[sel]) nEventsBaseline = len(df['weight'][sel]) signal_label = np.ones(nEventsBaseline) if ( df['dataset'] == 'WH') else np.zeros(nEventsBaseline) #Let's make sure we weight our events properly. wght = df['weight'][sel] * 137 output['met'] += processor.column_accumulator(met_pt[sel].flatten()) output['ht'] += processor.column_accumulator(ht[sel].flatten()) output['lead_jet_pt'] += processor.column_accumulator( leading_jet[sel].pt.flatten()) output['sublead_jet_pt'] += processor.column_accumulator( subleading_jet[sel].pt.flatten()) output['njets'] += processor.column_accumulator(njets[sel].flatten()) output['bjets'] += processor.column_accumulator(nbjets[sel].flatten()) output['nWs'] += processor.column_accumulator( wtag[sel].counts.flatten()) output['nHs'] += processor.column_accumulator( htag[sel].counts.flatten()) output['nFatJets'] += processor.column_accumulator( fatjet[sel].counts.flatten()) output['met_significance'] += processor.column_accumulator( met_sig.flatten()) output['min_dphi_met_j4'] += processor.column_accumulator( min_dphiJetMet4[sel].flatten()) #output['dR_fj1_fj2'] += processor.column_accumulator(dR_fj1_fj2[sel].flatten()) output['signal'] += processor.column_accumulator(signal_label) return output