コード例 #1
0
    def process(self, df):
        """
        Processing function. This is where the actual analysis happens.
        """
        output = self.accumulator.identity()
        dataset = df["dataset"]
        cfg = loadConfig()

        ## Muons
        muon = Collections(df, "Muon", "tight").get()
        vetomuon = Collections(df, "Muon", "veto").get()
        dimuon = muon.choose(2)
        SSmuon = (dimuon[(dimuon.i0.charge * dimuon.i1.charge) > 0].counts > 0)

        ## Electrons
        electron = Collections(df, "Electron", "tight").get()
        vetoelectron = Collections(df, "Electron", "veto").get()
        dielectron = electron.choose(2)
        SSelectron = (dielectron[(dielectron.i0.charge *
                                  dielectron.i1.charge) > 0].counts > 0)

        ## E/Mu cross
        dilepton = electron.cross(muon)
        SSdilepton = (
            dilepton[(dilepton.i0.charge * dilepton.i1.charge) > 0].counts > 0)

        ## how to get leading lepton easily? Do I actually care?
        leading_muon = muon[muon.pt.argmax()]
        leading_electron = electron[electron.pt.argmax()]

        lepton = mergeArray(electron, muon)
        '''
        ok so this is getting **really** awkward (pun slightly intended). because the mergeArray function builds a JaggedArray that has a UnionArry as .content, which in turn
        does not work with .argmax(), we need to build a jagged array just holding the pts
        '''
        lepton_pt = awkward.concatenate([electron.pt, muon.pt], axis=1)
        leading_lep_index = lepton_pt.argmax()
        trailing_lep_index = lepton_pt.argmin()

        #leading_lep = lepton[lepton.p4.pt().argmax()]
        #leading_lep_pt = lepton.p4.fPt[:,:1]
        leading_lep_pt = lepton[leading_lep_index].p4.fPt.max(
        )  # taking the max here has no impact, but otherwise code fails
        leading_lep_eta = lepton[leading_lep_index].p4.fEta.max(
        )  # taking the max here has no impact, but otherwise code fails

        trailing_lep_pt = lepton[trailing_lep_index].p4.fPt.max(
        )  # taking the max here has no impact, but otherwise code fails
        trailing_lep_eta = lepton[trailing_lep_index].p4.fEta.max(
        )  # taking the max here has no impact, but otherwise code fails

        ## Jets
        jet = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt=df['Jet_pt'].content,
            eta=df['Jet_eta'].content,
            phi=df['Jet_phi'].content,
            mass=df['Jet_mass'].content,
            jetId=df['Jet_jetId'].
            content,  # https://twiki.cern.ch/twiki/bin/view/CMS/JetID
            puId=df['Jet_puId'].
            content,  # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJetID
            btagDeepB=df['Jet_btagDeepB'].
            content,  # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X
        )

        jet = jet[(jet.pt > 25) & (jet.jetId > 1)]
        jet = jet[~jet.match(
            muon, deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~jet.match(
            electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons
        btag = jet[(jet.btagDeepB > 0.4184) & (abs(jet.eta) < 2.4)]
        central = jet[(abs(jet.eta) < 2.4)]
        light = jet[((jet.btagDeepB < 0.4184) & (abs(jet.eta) < 2.4)) |
                    (abs(jet.eta) >= 2.4)]
        lightCentral = jet[(jet.btagDeepB < 0.4184) & (abs(jet.eta) < 2.4) &
                           (jet.pt > 30)]
        fw = light[abs(light.eta).argmax()]  # the most forward light jet

        leading_jet = jet[jet.pt.argmax()]
        leading_b = btag[btag.pt.argmax()]

        mass_eb = electron.cross(btag).mass
        mass_mub = muon.cross(btag).mass
        mass_lb = awkward.concatenate([mass_eb, mass_mub], axis=1)
        mlb_min = mass_lb.min()
        mlb_max = mass_lb.max()

        mll = awkward.concatenate(
            [dimuon.mass, dielectron.mass, dilepton.mass],
            axis=1).max()  # max shouldn't matter, again
        ej = electron.cross(jet)
        muj = muon.cross(jet)
        deltaR_ej = ej.i0.p4.delta_r(ej.i1.p4)
        deltaR_muj = muj.i0.p4.delta_r(muj.i1.p4)

        deltaR_lj = awkward.concatenate([deltaR_ej, deltaR_muj], axis=1)
        deltaR_lj_min = deltaR_lj.min()

        ## MET -> can switch to puppi MET
        met_pt = df["MET_pt"]
        met_phi = df["MET_phi"]

        ## other variables
        st = df["MET_pt"] + jet.pt.sum() + muon.pt.sum() + electron.pt.sum()
        ht = jet.pt.sum()

        light_light = light.choose(2)
        mjj_max = light_light[light_light.mass.argmax()].mass

        ## define selections (maybe move to a different file at some point)
        dilep = ((electron.counts + muon.counts) == 2)
        leppt = (((electron.pt > 25).counts + (muon.pt > 25).counts) > 0)
        lepveto = ((vetoelectron.counts + vetomuon.counts) == 2)
        SS = (SSelectron | SSmuon | SSdilepton)

        output['totalEvents']['all'] += len(df['weight'])

        # Cutflow
        processes = [
            'tW_scattering', 'TTW', 'TTZ', 'TTH', 'TTTT', 'diboson', 'ttbar',
            'DY'
        ]
        cutflow = Cutflow(output, df, cfg, processes)

        cutflow.addRow('dilep', dilep)
        cutflow.addRow('leppt', leppt)
        cutflow.addRow('lepveto', lepveto)
        #cutflow.addRow( 'SS',          SS )
        cutflow.addRow('njet4', (jet.counts >= 4))
        cutflow.addRow('light2', (light.counts >= 2))
        cutflow.addRow('nbtag', btag.counts > 0)

        baseline = cutflow.selection

        output['passedEvents']['all'] += len(df['weight'][baseline])

        nEventsBaseline = len(df['weight'][baseline])
        signal_label = np.ones(nEventsBaseline) if (
            df['dataset'] == 'tW_scattering'
            or df['dataset'] == 'TTW') else np.zeros(nEventsBaseline)

        df_out = pd.DataFrame({
            'j0_pt':
            leading_jet[baseline].pt.flatten(),
            'j0_eta':
            leading_jet[baseline].eta.flatten(),
            'l0_pt':
            leading_lep_pt[baseline].flatten(),
            'l0_eta':
            leading_lep_eta[baseline].flatten(),  # this was the problem
            'l1_pt':
            trailing_lep_pt[baseline].flatten(),
            'l1_eta':
            trailing_lep_eta[baseline].flatten(),  # this was the problem
            'st':
            st[baseline].flatten(),
            'ht':
            ht[baseline].flatten(),
            'njet':
            jet.counts[baseline].flatten(),
            'nbtag':
            btag.counts[baseline].flatten(),
            'met':
            met_pt[baseline].flatten(),
            'mjj_max':
            mjj_max[baseline].flatten(),
            'mlb_min':
            mlb_min[baseline].flatten(),
            'mlb_max':
            mlb_max[baseline].flatten(),
            'deltaR_lj_min':
            deltaR_lj_min[baseline].flatten(),
            'mll':
            mll[baseline].flatten(),
            'signal':
            signal_label,
            'weight':
            df['weight'][baseline]
        })
        df_out.to_hdf('data/data_X.h5',
                      key='df',
                      format='table',
                      mode='a',
                      append=True)

        return output
コード例 #2
0
import pandas as pd
import os

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import numpy as np

from klepto.archives import dir_archive

# import all the colors and tools for plotting
from Tools.helpers import loadConfig
from helpers import *

# load the configuration
cfg = loadConfig()

year = 2018

# load the results
cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']),
                                 'WH_%s' % year),
                    serialized=True)
#cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cfg['caches']['WH_small']), serialized=True)
cache.load()

histograms = cache.get('histograms')
output = cache.get('simple_output')
plotDir = os.path.expandvars(cfg['meta']['plots']) + '/plots_WH_%s/' % year
finalizePlotDir(plotDir)
コード例 #3
0
    def process(self, df):
        """
        Processing function. This is where the actual analysis happens.
        """
        output = self.accumulator.identity()
        dataset = df["dataset"]
        cfg = loadConfig()

        ## MET -> can switch to puppi MET
        met_pt = df["MET_pt"]
        met_phi = df["MET_phi"]

        ## Muons
        muon = JaggedCandidateArray.candidatesfromcounts(
            df['nMuon'],
            pt=df['Muon_pt'].content,
            eta=df['Muon_eta'].content,
            phi=df['Muon_phi'].content,
            mass=df['Muon_mass'].content,
            miniPFRelIso_all=df['Muon_miniPFRelIso_all'].content,
            looseId=df['Muon_looseId'].content)
        muon = muon[(muon.pt > 10) & (abs(muon.eta) < 2.4) & (muon.looseId) &
                    (muon.miniPFRelIso_all < 0.2)]
        #muon = Collections(df, "Muon", "tightTTH").get() # this needs a fix for DASK

        electrons = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt=df['Electron_pt'].content,
            eta=df['Electron_eta'].content,
            phi=df['Electron_phi'].content,
            mass=df['Electron_mass'].content,
            pdgid=df['Electron_pdgId'].content,
            mini_iso=df['Electron_miniPFRelIso_all'].content)

        ## Electrons
        electron = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt=df['Electron_pt'].content,
            eta=df['Electron_eta'].content,
            phi=df['Electron_phi'].content,
            mass=df['Electron_mass'].content,
            miniPFRelIso_all=df['Electron_miniPFRelIso_all'].content,
            cutBased=df['Electron_cutBased'].content)
        electron = electron[(electron.pt > 10) & (abs(electron.eta) < 2.4) &
                            (electron.miniPFRelIso_all < 0.1) &
                            (electron.cutBased >= 1)]
        #electron = Collections(df, "Electron", "tightTTH").get() # this needs a fix for DASK

        ## FatJets
        fatjet = JaggedCandidateArray.candidatesfromcounts(
            df['nFatJet'],
            pt=df['FatJet_pt'].content,
            eta=df['FatJet_eta'].content,
            phi=df['FatJet_phi'].content,
            mass=df['FatJet_mass'].content,
            msoftdrop=df["FatJet_msoftdrop"].content,
            deepTagMD_HbbvsQCD=df['FatJet_deepTagMD_HbbvsQCD'].content,
            deepTagMD_WvsQCD=df['FatJet_deepTagMD_WvsQCD'].content,
            deepTag_WvsQCD=df['FatJet_deepTag_WvsQCD'].content)

        leadingFatJets = fatjet[:, :2]
        difatjet = leadingFatJets.choose(2)
        dphiDiFatJet = np.arccos(np.cos(difatjet.i0.phi - difatjet.i1.phi))

        nfatjets = fatjet.counts

        htag = fatjet[((fatjet.pt > 200) &
                       (fatjet.deepTagMD_HbbvsQCD > 0.8365))]
        htag_hard = fatjet[((fatjet.pt > 300) &
                            (fatjet.deepTagMD_HbbvsQCD > 0.8365))]

        lead_htag = htag[htag.pt.argmax()]

        wtag = fatjet[((fatjet.pt > 200) & (fatjet.deepTagMD_HbbvsQCD < 0.8365)
                       & (fatjet.deepTag_WvsQCD > 0.918))]
        wtag_hard = fatjet[((fatjet.pt > 300) &
                            (fatjet.deepTagMD_HbbvsQCD < 0.8365) &
                            (fatjet.deepTag_WvsQCD > 0.918))]

        lead_wtag = wtag[wtag.pt.argmax()]

        wh = lead_htag.cross(lead_wtag)
        #wh_deltaPhi = np.arccos(wh.i0.phi - wh.i1.phi)
        wh_deltaR = wh.i0.p4.delta_r(wh.i1.p4)

        ## Jets
        jet = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt=df['Jet_pt'].content,
            eta=df['Jet_eta'].content,
            phi=df['Jet_phi'].content,
            mass=df['Jet_mass'].content,
            jetId=df['Jet_jetId'].
            content,  # https://twiki.cern.ch/twiki/bin/view/CMS/JetID
            #puId = df['Jet_puId'].content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJetID
            btagDeepB=df['Jet_btagDeepB'].
            content,  # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X
            #deepJet = df['Jet_'].content # not there yet?
        )

        jet = jet[(jet.pt > 30) & (abs(jet.eta) < 2.4) & (jet.jetId > 0)]
        jet = jet[(jet.pt > 30) & (jet.jetId > 1) & (abs(jet.eta) < 2.4)]
        jet = jet[~jet.match(
            muon, deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~jet.match(
            electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons
        jet = jet[jet.pt.argsort(ascending=False)]  # sort the jets
        btag = jet[(jet.btagDeepB > 0.4184)]
        light = jet[(jet.btagDeepB < 0.4184)]

        njets = jet.counts
        nbjets = btag.counts

        ## Get the leading b-jets
        high_score_btag = jet[jet.btagDeepB.argsort(ascending=False)][:, :2]

        leadingJets = jet[:, :2]
        dijet = leadingJets.choose(2)
        dphiDiJet = np.arccos(np.cos(dijet.i0.phi - dijet.i1.phi))

        leading_jet = leadingJets[leadingJets.pt.argmax()]
        subleading_jet = leadingJets[leadingJets.pt.argmin()]
        leading_b = btag[btag.pt.argmax()]

        bb = high_score_btag.choose(2)
        bb_deltaPhi = np.arccos(np.cos(bb.i0.phi - bb.i1.phi))
        bb_deltaR = bb.i0.p4.delta_r(bb.i1.p4)

        mtb = mt(btag.pt, btag.phi, met_pt, met_phi)

        ## other variables
        ht = jet.pt.sum()
        #met_sig = met_pt/np.sqrt(ht)

        min_dphiJetMet4 = np.arccos(np.cos(jet[:, :4].phi - met_phi)).min()
        #goodjcut = ((jets.pt>30) & (abs(jets.eta)<2.4) & (jets.jetid>0))
        #goodjets = jets[goodjcut]
        #abs_min_dphi_met_leadjs4 = abs(np.arccos(np.cos(goodjets[:,:4].phi-metphi)).min())
        #print(min_dphiJetMet4.shape)
        #print(self.means['min_dphi_met_j4'].shape)

        ht_ps = (ht > 0)
        met_ps = (met_pt > 250)
        njet_ps = (njets >= 2)
        bjet_ps = (nbjets >= 1)
        fatjet_sel = (nfatjets >= 1)

        e_sel = (electron.counts == 0)
        m_sel = (muon.counts == 0)
        #it_sel = (veto_it.counts == 0)
        #t_sel = (veto_t.counts == 0)
        l_sel = e_sel & m_sel  # & it_sel & t_sel

        h_sel = (htag.counts > 0)
        wmc_sel = (wtag.counts > 0)

        sel = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & h_sel  #& wmc_sel

        met_sig = met_pt[sel] / np.sqrt(ht[sel])

        nEventsBaseline = len(df['weight'][sel])
        signal_label = np.ones(nEventsBaseline) if (
            df['dataset'] == 'WH') else np.zeros(nEventsBaseline)

        #Let's make sure we weight our events properly.
        wght = df['weight'][sel] * 137

        output['met'] += processor.column_accumulator(met_pt[sel].flatten())
        output['ht'] += processor.column_accumulator(ht[sel].flatten())
        output['lead_jet_pt'] += processor.column_accumulator(
            leading_jet[sel].pt.flatten())
        output['sublead_jet_pt'] += processor.column_accumulator(
            subleading_jet[sel].pt.flatten())
        output['njets'] += processor.column_accumulator(njets[sel].flatten())
        output['bjets'] += processor.column_accumulator(nbjets[sel].flatten())
        output['nWs'] += processor.column_accumulator(
            wtag[sel].counts.flatten())
        output['nHs'] += processor.column_accumulator(
            htag[sel].counts.flatten())
        output['nFatJets'] += processor.column_accumulator(
            fatjet[sel].counts.flatten())
        output['met_significance'] += processor.column_accumulator(
            met_sig.flatten())
        output['min_dphi_met_j4'] += processor.column_accumulator(
            min_dphiJetMet4[sel].flatten())
        #output['dR_fj1_fj2']        += processor.column_accumulator(dR_fj1_fj2[sel].flatten())
        output['signal'] += processor.column_accumulator(signal_label)

        return output