Ejemplo n.º 1
0
def SS_selection(lep1, lep2):
    selection = PackedSelection()

    is_dilep   = ((ak.num(lep1) + ak.num(lep2))==2)
    pos_charge = ((ak.sum(lep1.pdgId, axis=1) + ak.sum(lep2.pdgId, axis=1))<0)
    neg_charge = ((ak.sum(lep1.pdgId, axis=1) + ak.sum(lep2.pdgId, axis=1))>0)

    dilep2    = choose(lep2, 2)
    dilep1   = choose(lep1, 2)
    dilep   = cross(lep2, lep1)

    is_SS = ( ak.any((dilep2['0'].charge * dilep2['1'].charge)>0, axis=1) | \
              ak.any((dilep1['0'].charge * dilep1['1'].charge)>0, axis=1) | \
              ak.any((dilep['0'].charge * dilep['1'].charge)>0, axis=1) )

    selection.add('SS', is_SS)
    ss_reqs = ['SS']

    ss_reqs_d = {sel: True for sel in ss_reqs}
    ss_selection = selection.require(**ss_reqs_d)
    return ss_selection
Ejemplo n.º 2
0
def test_packed_selection():
    from coffea.analysis_tools import PackedSelection

    sel = PackedSelection()

    counts, test_eta, test_pt = dummy_jagged_eta_pt()

    all_true = np.full(shape=counts.shape, fill_value=True, dtype=np.bool)
    all_false = np.full(shape=counts.shape, fill_value=False, dtype=np.bool)
    ones = np.ones(shape=counts.shape, dtype=np.uint64)
    wrong_shape = ones = np.ones(shape=(counts.shape[0] - 5, ), dtype=np.bool)

    sel.add("all_true", all_true)
    sel.add("all_false", all_false)

    assert np.all(sel.require(all_true=True, all_false=False) == all_true)
    assert np.all(sel.all("all_true", "all_false") == all_false)

    try:
        sel.require(all_true=1, all_false=0)
    except ValueError:
        pass

    try:
        sel.add("wrong_shape", wrong_shape)
    except ValueError:
        pass

    try:
        sel.add("ones", ones)
    except ValueError:
        pass

    try:
        overpack = PackedSelection()
        for i in range(65):
            overpack.add("sel_%d", all_true)
    except RuntimeError:
        pass
Ejemplo n.º 3
0
    def process(self, events):
        # Dataset parameters
        dataset = events.metadata['dataset']
        histAxisName = self._samples[dataset]['histAxisName']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']
        datasets = [
            'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon',
            'DoubleElectron'
        ]
        for d in datasets:
            if d in dataset: dataset = dataset.split('_')[0]

        # Initialize objects
        met = events.MET
        e = events.Electron
        mu = events.Muon
        tau = events.Tau
        j = events.Jet

        # Muon selection

        mu['isPres'] = isPresMuon(mu.dxy, mu.dz, mu.sip3d, mu.looseId)
        mu['isTight'] = isTightMuon(mu.pt,
                                    mu.eta,
                                    mu.dxy,
                                    mu.dz,
                                    mu.pfRelIso03_all,
                                    mu.sip3d,
                                    mu.mvaTTH,
                                    mu.mediumPromptId,
                                    mu.tightCharge,
                                    mu.looseId,
                                    minpt=10)
        mu['isGood'] = mu['isPres'] & mu['isTight']

        leading_mu = mu[ak.argmax(mu.pt, axis=-1, keepdims=True)]
        leading_mu = leading_mu[leading_mu.isGood]

        mu = mu[mu.isGood]
        mu_pres = mu[mu.isPres]

        # Electron selection
        e['isPres'] = isPresElec(e.pt,
                                 e.eta,
                                 e.dxy,
                                 e.dz,
                                 e.miniPFRelIso_all,
                                 e.sip3d,
                                 e.lostHits,
                                 minpt=15)
        e['isTight'] = isTightElec(e.pt,
                                   e.eta,
                                   e.dxy,
                                   e.dz,
                                   e.miniPFRelIso_all,
                                   e.sip3d,
                                   e.mvaTTH,
                                   e.mvaFall17V2Iso,
                                   e.lostHits,
                                   e.convVeto,
                                   e.tightCharge,
                                   e.sieie,
                                   e.hoe,
                                   e.eInvMinusPInv,
                                   minpt=15)
        e['isClean'] = isClean(e, mu, drmin=0.05)
        e['isGood'] = e['isPres'] & e['isTight'] & e['isClean']

        leading_e = e[ak.argmax(e.pt, axis=-1, keepdims=True)]
        leading_e = leading_e[leading_e.isGood]

        e = e[e.isGood]
        e_pres = e[e.isPres & e.isClean]

        # Tau selection
        tau['isPres'] = isPresTau(tau.pt,
                                  tau.eta,
                                  tau.dxy,
                                  tau.dz,
                                  tau.leadTkPtOverTauPt,
                                  tau.idAntiMu,
                                  tau.idAntiEle,
                                  tau.rawIso,
                                  tau.idDecayModeNewDMs,
                                  minpt=20)
        tau['isClean'] = isClean(tau, e_pres, drmin=0.4) & isClean(
            tau, mu_pres, drmin=0.4)
        tau['isGood'] = tau['isPres']  # & tau['isClean'], for the moment
        tau = tau[tau.isGood]

        nElec = ak.num(e)
        nMuon = ak.num(mu)
        nTau = ak.num(tau)

        twoLeps = (nElec + nMuon) == 2
        threeLeps = (nElec + nMuon) == 3
        twoElec = (nElec == 2)
        twoMuon = (nMuon == 2)
        e0 = e[ak.argmax(e.pt, axis=-1, keepdims=True)]
        m0 = mu[ak.argmax(mu.pt, axis=-1, keepdims=True)]

        # Attach the lepton SFs to the electron and muons collections
        AttachElectronSF(e, year=year)
        AttachMuonSF(mu, year=year)

        # Create a lepton (muon+electron) collection and calculate a per event lepton SF
        leps = ak.concatenate([e, mu], axis=-1)
        events['lepSF_nom'] = ak.prod(leps.sf_nom, axis=-1)
        events['lepSF_hi'] = ak.prod(leps.sf_hi, axis=-1)
        events['lepSF_lo'] = ak.prod(leps.sf_lo, axis=-1)

        # Jet selection
        jetptname = 'pt_nom' if hasattr(j, 'pt_nom') else 'pt'

        ### Jet energy corrections
        if not isData:
            j["pt_raw"] = (1 - j.rawFactor) * j.pt
            j["mass_raw"] = (1 - j.rawFactor) * j.mass
            j["pt_gen"] = ak.values_astype(ak.fill_none(j.matched_gen.pt, 0),
                                           np.float32)
            j["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll,
                                           j.pt)[0]
            events_cache = events.caches[0]
            corrected_jets = jet_factory.build(j, lazy_cache=events_cache)
            #print('jet pt: ',j.pt)
            #print('cor pt: ',corrected_jets.pt)
            #print('jes up: ',corrected_jets.JES_jes.up.pt)
            #print('jes down: ',corrected_jets.JES_jes.down.pt)
            #print(ak.fields(corrected_jets))
            '''
          # SYSTEMATICS
          jets = corrected_jets
          if(self.jetSyst == 'JERUp'):
            jets = corrected_jets.JER.up
          elif(self.jetSyst == 'JERDown'):
            jets = corrected_jets.JER.down
          elif(self.jetSyst == 'JESUp'):
            jets = corrected_jets.JES_jes.up
          elif(self.jetSyst == 'JESDown'):
            jets = corrected_jets.JES_jes.down
          '''
        j['isGood'] = isTightJet(getattr(j,
                                         jetptname), j.eta, j.jetId, j.neHEF,
                                 j.neEmEF, j.chHEF, j.chEmEF, j.nConstituents)
        #j['isgood']  = isGoodJet(j.pt, j.eta, j.jetId)
        #j['isclean'] = isClean(j, e, mu)
        j['isClean'] = isClean(j, e, drmin=0.4) & isClean(
            j, mu, drmin=0.4)  # & isClean(j, tau, drmin=0.4)
        goodJets = j[(j.isClean) & (j.isGood)]
        njets = ak.num(goodJets)
        ht = ak.sum(goodJets.pt, axis=-1)
        j0 = goodJets[ak.argmax(goodJets.pt, axis=-1, keepdims=True)]
        #nbtags = ak.num(goodJets[goodJets.btagDeepFlavB > 0.2770])
        # Loose DeepJet WP
        if year == 2017: btagwpl = 0.0532  #WP loose
        else: btagwpl = 0.0490  #WP loose
        isBtagJetsLoose = (goodJets.btagDeepB > btagwpl)
        isNotBtagJetsLoose = np.invert(isBtagJetsLoose)
        nbtagsl = ak.num(goodJets[isBtagJetsLoose])
        # Medium DeepJet WP
        if year == 2017: btagwpm = 0.3040  #WP medium
        else: btagwpm = 0.2783  #WP medium
        isBtagJetsMedium = (goodJets.btagDeepB > btagwpm)
        isNotBtagJetsMedium = np.invert(isBtagJetsMedium)
        nbtagsm = ak.num(goodJets[isBtagJetsMedium])

        # Btag SF following 1a) in https://twiki.cern.ch/twiki/bin/viewauth/CMS/BTagSFMethods
        btagSF = np.ones_like(ht)
        btagSFUp = np.ones_like(ht)
        btagSFDo = np.ones_like(ht)
        if not isData:
            pt = goodJets.pt
            abseta = np.abs(goodJets.eta)
            flav = goodJets.hadronFlavour
            bJetSF = GetBTagSF(abseta, pt, flav)
            bJetSFUp = GetBTagSF(abseta, pt, flav, sys=1)
            bJetSFDo = GetBTagSF(abseta, pt, flav, sys=-1)
            bJetEff = GetBtagEff(abseta, pt, flav, year)
            bJetEff_data = bJetEff * bJetSF
            bJetEff_dataUp = bJetEff * bJetSFUp
            bJetEff_dataDo = bJetEff * bJetSFDo

            pMC = ak.prod(bJetEff[isBtagJetsMedium], axis=-1) * ak.prod(
                (1 - bJetEff[isNotBtagJetsMedium]), axis=-1)
            pData = ak.prod(bJetEff_data[isBtagJetsMedium], axis=-1) * ak.prod(
                (1 - bJetEff_data[isNotBtagJetsMedium]), axis=-1)
            pDataUp = ak.prod(
                bJetEff_dataUp[isBtagJetsMedium], axis=-1) * ak.prod(
                    (1 - bJetEff_dataUp[isNotBtagJetsMedium]), axis=-1)
            pDataDo = ak.prod(
                bJetEff_dataDo[isBtagJetsMedium], axis=-1) * ak.prod(
                    (1 - bJetEff_dataDo[isNotBtagJetsMedium]), axis=-1)

            pMC = ak.where(pMC == 0, 1,
                           pMC)  # removeing zeroes from denominator...
            btagSF = pData / pMC
            btagSFUp = pDataUp / pMC
            btagSFDo = pDataUp / pMC

        ##################################################################
        ### 2 same-sign leptons
        ##################################################################

        # emu
        singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)]
        singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)]
        em = ak.cartesian({"e": singe, "m": singm})
        emSSmask = (em.e.charge * em.m.charge > 0)
        emSS = em[emSSmask]
        nemSS = len(ak.flatten(emSS))

        # ee and mumu
        # pt>-1 to preserve jagged dimensions
        ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)]
        mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)]

        sumcharge = ak.sum(e.charge, axis=-1) + ak.sum(mu.charge, axis=-1)

        eepairs = ak.combinations(ee, 2, fields=["e0", "e1"])
        eeSSmask = (eepairs.e0.charge * eepairs.e1.charge > 0)
        eeonZmask = (np.abs((eepairs.e0 + eepairs.e1).mass - 91.2) < 10)
        eeoffZmask = (eeonZmask == 0)

        mmpairs = ak.combinations(mm, 2, fields=["m0", "m1"])
        mmSSmask = (mmpairs.m0.charge * mmpairs.m1.charge > 0)
        mmonZmask = (np.abs((mmpairs.m0 + mmpairs.m1).mass - 91.2) < 10)
        mmoffZmask = (mmonZmask == 0)

        eeSSonZ = eepairs[eeSSmask & eeonZmask]
        eeSSoffZ = eepairs[eeSSmask & eeoffZmask]
        mmSSonZ = mmpairs[mmSSmask & mmonZmask]
        mmSSoffZ = mmpairs[mmSSmask & mmoffZmask]
        neeSS = len(ak.flatten(eeSSonZ)) + len(ak.flatten(eeSSoffZ))
        nmmSS = len(ak.flatten(mmSSonZ)) + len(ak.flatten(mmSSoffZ))

        print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]' %
              (neeSS, nemSS, nmmSS))

        # Cuts
        eeSSmask = (ak.num(eeSSmask[eeSSmask]) > 0)
        mmSSmask = (ak.num(mmSSmask[mmSSmask]) > 0)
        eeonZmask = (ak.num(eeonZmask[eeonZmask]) > 0)
        eeoffZmask = (ak.num(eeoffZmask[eeoffZmask]) > 0)
        mmonZmask = (ak.num(mmonZmask[mmonZmask]) > 0)
        mmoffZmask = (ak.num(mmoffZmask[mmoffZmask]) > 0)
        emSSmask = (ak.num(emSSmask[emSSmask]) > 0)

        ##################################################################
        ### 3 leptons
        ##################################################################

        # eem
        muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)]
        elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)]

        ee_eem = ak.combinations(elec_eem, 2, fields=["e0", "e1"])
        ee_eemZmask = (ee_eem.e0.charge * ee_eem.e1.charge < 1) & (np.abs(
            (ee_eem.e0 + ee_eem.e1).mass - 91.2) < 10)
        ee_eemOffZmask = (ee_eem.e0.charge * ee_eem.e1.charge < 1) & (np.abs(
            (ee_eem.e0 + ee_eem.e1).mass - 91.2) > 10)
        ee_eemZmask = (ak.num(ee_eemZmask[ee_eemZmask]) > 0)
        ee_eemOffZmask = (ak.num(ee_eemOffZmask[ee_eemOffZmask]) > 0)

        eepair_eem = (ee_eem.e0 + ee_eem.e1)
        trilep_eem = eepair_eem + muon_eem  #ak.cartesian({"e0":ee_eem.e0,"e1":ee_eem.e1, "m":muon_eem})

        # mme
        muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)]
        elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)]

        mm_mme = ak.combinations(muon_mme, 2, fields=["m0", "m1"])
        mm_mmeZmask = (mm_mme.m0.charge * mm_mme.m1.charge < 1) & (np.abs(
            (mm_mme.m0 + mm_mme.m1).mass - 91.2) < 10)
        mm_mmeOffZmask = (mm_mme.m0.charge * mm_mme.m1.charge < 1) & (np.abs(
            (mm_mme.m0 + mm_mme.m1).mass - 91.2) > 10)
        mm_mmeZmask = (ak.num(mm_mmeZmask[mm_mmeZmask]) > 0)
        mm_mmeOffZmask = (ak.num(mm_mmeOffZmask[mm_mmeOffZmask]) > 0)

        mmpair_mme = (mm_mme.m0 + mm_mme.m1)
        trilep_mme = mmpair_mme + elec_mme

        mZ_mme = mmpair_mme.mass
        mZ_eem = eepair_eem.mass
        m3l_eem = trilep_eem.mass
        m3l_mme = trilep_mme.mass

        # eee and mmm
        eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)]
        mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)]

        eee_leps = ak.combinations(eee, 3, fields=["e0", "e1", "e2"])
        mmm_leps = ak.combinations(mmm, 3, fields=["m0", "m1", "m2"])

        ee_pairs = ak.combinations(eee, 2, fields=["e0", "e1"])
        mm_pairs = ak.combinations(mmm, 2, fields=["m0", "m1"])
        ee_pairs_index = ak.argcombinations(eee, 2, fields=["e0", "e1"])
        mm_pairs_index = ak.argcombinations(mmm, 2, fields=["m0", "m1"])

        mmSFOS_pairs = mm_pairs[
            (np.abs(mm_pairs.m0.pdgId) == np.abs(mm_pairs.m1.pdgId))
            & (mm_pairs.m0.charge != mm_pairs.m1.charge)]
        offZmask_mm = ak.all(
            np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2) > 10.,
            axis=1,
            keepdims=True) & (ak.num(mmSFOS_pairs) > 0)
        onZmask_mm = ak.any(
            np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2) < 10.,
            axis=1,
            keepdims=True)

        eeSFOS_pairs = ee_pairs[
            (np.abs(ee_pairs.e0.pdgId) == np.abs(ee_pairs.e1.pdgId))
            & (ee_pairs.e0.charge != ee_pairs.e1.charge)]
        offZmask_ee = ak.all(
            np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2) > 10,
            axis=1,
            keepdims=True) & (ak.num(eeSFOS_pairs) > 0)
        onZmask_ee = ak.any(
            np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2) < 10,
            axis=1,
            keepdims=True)

        # Create masks **for event selection**
        eeeOnZmask = (ak.num(onZmask_ee[onZmask_ee]) > 0)
        eeeOffZmask = (ak.num(offZmask_ee[offZmask_ee]) > 0)
        mmmOnZmask = (ak.num(onZmask_mm[onZmask_mm]) > 0)
        mmmOffZmask = (ak.num(offZmask_mm[offZmask_mm]) > 0)

        # Now we need to create masks for the leptons in order to select leptons from the Z boson candidate (in onZ categories)
        ZeeMask = ak.argmin(np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass -
                                   91.2),
                            axis=1,
                            keepdims=True)
        ZmmMask = ak.argmin(np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass -
                                   91.2),
                            axis=1,
                            keepdims=True)

        Zee = eeSFOS_pairs[ZeeMask]
        Zmm = mmSFOS_pairs[ZmmMask]
        eZ0 = Zee.e0[ak.num(eeSFOS_pairs) > 0]
        eZ1 = Zee.e1[ak.num(eeSFOS_pairs) > 0]
        eZ = eZ0 + eZ1
        mZ0 = Zmm.m0[ak.num(mmSFOS_pairs) > 0]
        mZ1 = Zmm.m1[ak.num(mmSFOS_pairs) > 0]
        mZ = mZ0 + mZ1
        mZ_eee = eZ.mass
        mZ_mmm = mZ.mass

        # And for the W boson
        ZmmIndices = mm_pairs_index[ZmmMask]
        ZeeIndices = ee_pairs_index[ZeeMask]
        eW = eee[~ZeeIndices.e0 | ~ZeeIndices.e1]
        mW = mmm[~ZmmIndices.m0 | ~ZmmIndices.m1]

        triElec = eee_leps.e0 + eee_leps.e1 + eee_leps.e2
        triMuon = mmm_leps.m0 + mmm_leps.m1 + mmm_leps.m2
        m3l_eee = triElec.mass
        m3l_mmm = triMuon.mass

        ##################################################################
        ### >=4 leptons
        ##################################################################

        # 4lep cat
        is4lmask = ((nElec + nMuon) >= 4)
        muon_4l = mu[(is4lmask) & (mu.pt > -1)]
        elec_4l = e[(is4lmask) & (e.pt > -1)]

        # selecting 4 leading leptons
        leptons = ak.concatenate([e, mu], axis=-1)
        leptons_sorted = leptons[ak.argsort(leptons.pt,
                                            axis=-1,
                                            ascending=False)]
        lep4l = leptons_sorted[:, 0:4]
        e4l = lep4l[abs(lep4l.pdgId) == 11]
        mu4l = lep4l[abs(lep4l.pdgId) == 13]
        nElec4l = ak.num(e4l)
        nMuon4l = ak.num(mu4l)

        # Triggers
        trig_eeSS = passTrigger(events, 'ee', isData, dataset)
        trig_mmSS = passTrigger(events, 'mm', isData, dataset)
        trig_emSS = passTrigger(events, 'em', isData, dataset)
        trig_eee = passTrigger(events, 'eee', isData, dataset)
        trig_mmm = passTrigger(events, 'mmm', isData, dataset)
        trig_eem = passTrigger(events, 'eem', isData, dataset)
        trig_mme = passTrigger(events, 'mme', isData, dataset)
        trig_4l = triggerFor4l(events, nMuon, nElec, isData, dataset)

        # MET filters

        # Weights
        genw = np.ones_like(events['event']) if (
            isData or len(self._wc_names_lst) > 0) else events['genWeight']

        ### We need weights for: normalization, lepSF, triggerSF, pileup, btagSF...
        weights = {}
        for r in [
                'all', 'ee', 'mm', 'em', 'eee', 'mmm', 'eem', 'mme', 'eeee',
                'eeem', 'eemm', 'mmme', 'mmmm'
        ]:
            # weights[r] = coffea.analysis_tools.Weights(len(events))
            weights[r] = coffea.analysis_tools.Weights(len(events),
                                                       storeIndividual=True)
            if len(self._wc_names_lst) > 0:
                sow = np.ones_like(
                    sow
                )  # Not valid in nanoAOD for EFT samples, MUST use SumOfEFTweights at analysis level
            weights[r].add('norm', genw if isData else (xsec / sow) * genw)
            weights[r].add('btagSF', btagSF, btagSFUp, btagSFDo)
            weights[r].add('lepSF', events.lepSF_nom, events.lepSF_hi,
                           events.lepSF_lo)

        # Extract the EFT quadratic coefficients and optionally use them to calculate the coefficients on the w**2 quartic function
        # eft_coeffs is never Jagged so convert immediately to numpy for ease of use.
        eft_coeffs = ak.to_numpy(events['EFTfitCoefficients']) if hasattr(
            events, "EFTfitCoefficients") else None
        if eft_coeffs is not None:
            # Check to see if the ordering of WCs for this sample matches what want
            if self._samples[dataset]['WCnames'] != self._wc_names_lst:
                eft_coeffs = efth.remap_coeffs(
                    self._samples[dataset]['WCnames'], self._wc_names_lst,
                    eft_coeffs)
        eft_w2_coeffs = efth.calc_w2_coeffs(eft_coeffs, self._dtype) if (
            self._do_errors and eft_coeffs is not None) else None

        # Selections and cuts
        selections = PackedSelection()  #(dtype='uint64')
        channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS']
        selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('emSS', (emSSmask) & (trig_emSS))

        channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ']
        selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem))
        selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem))
        selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme))
        selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme))

        channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ']
        selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee))
        selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee))
        selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm))
        selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm))

        channels4L = ['eeee', 'eeem', 'eemm', 'mmme', 'mmmm']
        selections.add('eeee', ((nElec4l == 4) & (nMuon4l == 0)) & (trig_4l))
        selections.add('eeem', ((nElec4l == 3) & (nMuon4l == 1)) & (trig_4l))
        selections.add('eemm', ((nElec4l == 2) & (nMuon4l == 2)) & (trig_4l))
        selections.add('mmme', ((nElec4l == 1) & (nMuon4l == 3)) & (trig_4l))
        selections.add('mmmm', ((nElec4l == 0) & (nMuon4l == 4)) & (trig_4l))

        selections.add('ch+', (sumcharge > 0))
        selections.add('ch-', (sumcharge < 0))
        selections.add('ch0', (sumcharge == 0))

        levels = ['base', '1+bm2+bl', '1bm', '2+bm']
        selections.add('base', (nElec + nMuon >= 2))
        selections.add('1+bm2+bl', (nElec + nMuon >= 2) & ((nbtagsm >= 1) &
                                                           (nbtagsl >= 2)))
        selections.add('1bm', (nElec + nMuon >= 2) & (nbtagsm == 1))
        selections.add('2+bm', (nElec + nMuon >= 2) & (nbtagsm >= 2))

        # Variables
        invMass_eeSSonZ = (eeSSonZ.e0 + eeSSonZ.e1).mass
        invMass_eeSSoffZ = (eeSSoffZ.e0 + eeSSoffZ.e1).mass
        invMass_mmSSonZ = (mmSSonZ.m0 + mmSSonZ.m1).mass
        invMass_mmSSoffZ = (mmSSoffZ.m0 + mmSSoffZ.m1).mass
        invMass_emSS = (emSS.e + emSS.m).mass

        varnames = {}
        varnames['met'] = met.pt
        varnames['ht'] = ht
        varnames['njets'] = njets
        varnames['invmass'] = {
            'eeSSonZ': invMass_eeSSonZ,
            'eeSSoffZ': invMass_eeSSoffZ,
            'mmSSonZ': invMass_mmSSonZ,
            'mmSSoffZ': invMass_mmSSoffZ,
            'emSS': invMass_emSS,
            'eemSSonZ': mZ_eem,
            'eemSSoffZ': mZ_eem,
            'mmeSSonZ': mZ_mme,
            'mmeSSoffZ': mZ_mme,
            'eeeSSonZ': mZ_eee,
            'eeeSSoffZ': mZ_eee,
            'mmmSSonZ': mZ_mmm,
            'mmmSSoffZ': mZ_mmm,
        }
        varnames['m3l'] = {
            'eemSSonZ': m3l_eem,
            'eemSSoffZ': m3l_eem,
            'mmeSSonZ': m3l_mme,
            'mmeSSoffZ': m3l_mme,
            'eeeSSonZ': m3l_eee,
            'eeeSSoffZ': m3l_eee,
            'mmmSSonZ': m3l_mmm,
            'mmmSSoffZ': m3l_mmm,
        }
        varnames['e0pt'] = e0.pt
        varnames['e0eta'] = e0.eta
        varnames['m0pt'] = m0.pt
        varnames['m0eta'] = m0.eta
        varnames['j0pt'] = j0.pt
        varnames['j0eta'] = j0.eta
        varnames['counts'] = np.ones_like(events['event'])

        # systematics
        systList = []
        if isData == False:
            systList = ['nominal']
            if self._do_systematics:
                systList = systList + [
                    'lepSFUp', 'lepSFDown', 'btagSFUp', 'btagSFDown'
                ]
        else:
            systList = ['noweight']
        # fill Histos
        hout = self.accumulator.identity()
        normweights = weights['all'].weight().flatten(
        )  # Why does it not complain about .flatten() here?
        sowweights = np.ones_like(normweights) if len(
            self._wc_names_lst) > 0 else normweights
        hout['SumOfEFTweights'].fill(sample=histAxisName,
                                     SumOfEFTweights=varnames['counts'],
                                     weight=sowweights,
                                     eft_coeff=eft_coeffs,
                                     eft_err_coeff=eft_w2_coeffs)

        for syst in systList:
            for var, v in varnames.items():
                for ch in channels2LSS + channels3L + channels4L:
                    for sumcharge in ['ch+', 'ch-', 'ch0']:
                        for lev in levels:
                            #find the event weight to be used when filling the histograms
                            weightSyst = syst
                            #in the case of 'nominal', or the jet energy systematics, no weight systematic variation is used (weightSyst=None)
                            if syst in [
                                    'nominal', 'JERUp', 'JERDown', 'JESUp',
                                    'JESDown'
                            ]:
                                weightSyst = None  # no weight systematic for these variations
                            if syst == 'noweight':
                                weight = np.ones(len(events))  # for data
                            else:
                                # call weights.weight() with the name of the systematic to be varied
                                if ch in channels3L: ch_w = ch[:3]
                                elif ch in channels2LSS: ch_w = ch[:2]
                                else: ch_w = ch
                                weight = weights['all'].weight(
                                    weightSyst
                                ) if isData else weights[ch_w].weight(
                                    weightSyst)
                            cuts = [ch] + [lev] + [sumcharge]
                            cut = selections.all(*cuts)
                            weights_flat = weight[cut].flatten(
                            )  # Why does it not complain about .flatten() here?
                            weights_ones = np.ones_like(weights_flat,
                                                        dtype=np.int)
                            eft_coeffs_cut = eft_coeffs[
                                cut] if eft_coeffs is not None else None
                            eft_w2_coeffs_cut = eft_w2_coeffs[
                                cut] if eft_w2_coeffs is not None else None

                            # filling histos
                            if var == 'invmass':
                                if ((ch in [
                                        'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ',
                                        'mmmSSonZ'
                                ]) or (ch in channels4L)):
                                    continue
                                else:
                                    values = ak.flatten(v[ch][cut])
                                hout['invmass'].fill(
                                    eft_coeff=eft_coeffs_cut,
                                    eft_err_coeff=eft_w2_coeffs_cut,
                                    sample=histAxisName,
                                    channel=ch,
                                    cut=lev,
                                    sumcharge=sumcharge,
                                    invmass=values,
                                    weight=weights_flat,
                                    systematic=syst)
                            elif var == 'm3l':
                                if ((ch in channels2LSS) or (ch in [
                                        'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ',
                                        'mmmSSonZ'
                                ]) or (ch in channels4L)):
                                    continue
                                values = ak.flatten(v[ch][cut])
                                hout['m3l'].fill(
                                    eft_coeff=eft_coeffs_cut,
                                    eft_err_coeff=eft_w2_coeffs_cut,
                                    sample=histAxisName,
                                    channel=ch,
                                    cut=lev,
                                    sumcharge=sumcharge,
                                    m3l=values,
                                    weight=weights_flat,
                                    systematic=syst)
                            else:
                                values = v[cut]
                                # These all look identical, do we need if/else here?
                                if var == 'ht':
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        ht=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
                                elif var == 'met':
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        met=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
                                elif var == 'njets':
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        njets=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
                                elif var == 'nbtags':
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        nbtags=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
                                elif var == 'counts':
                                    hout[var].fill(counts=values,
                                                   sample=histAxisName,
                                                   channel=ch,
                                                   cut=lev,
                                                   sumcharge=sumcharge,
                                                   weight=weights_ones,
                                                   systematic=syst)
                                elif var == 'j0eta':
                                    if lev == 'base': continue
                                    values = ak.flatten(values)
                                    #values=np.asarray(values)
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        j0eta=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
                                elif var == 'e0pt':
                                    if ch in [
                                            'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                            'mmmSSonZ', 'mmmm'
                                    ]:
                                        continue
                                    values = ak.flatten(values)
                                    #values=np.asarray(values)
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        e0pt=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst
                                    )  # Crashing here, not sure why. Related to values?
                                elif var == 'm0pt':
                                    if ch in [
                                            'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                            'eeeSSonZ', 'eeee'
                                    ]:
                                        continue
                                    values = ak.flatten(values)
                                    #values=np.asarray(values)
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        m0pt=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
                                elif var == 'e0eta':
                                    if ch in [
                                            'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                            'mmmSSonZ', 'mmmm'
                                    ]:
                                        continue
                                    values = ak.flatten(values)
                                    #values=np.asarray(values)
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        e0eta=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
                                elif var == 'm0eta':
                                    if ch in [
                                            'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                            'eeeSSonZ', 'eeee'
                                    ]:
                                        continue
                                    values = ak.flatten(values)
                                    #values=np.asarray(values)
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        m0eta=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
                                elif var == 'j0pt':
                                    if lev == 'base': continue
                                    values = ak.flatten(values)
                                    #values=np.asarray(values)
                                    hout[var].fill(
                                        eft_coeff=eft_coeffs_cut,
                                        eft_err_coeff=eft_w2_coeffs_cut,
                                        j0pt=values,
                                        sample=histAxisName,
                                        channel=ch,
                                        cut=lev,
                                        sumcharge=sumcharge,
                                        weight=weights_flat,
                                        systematic=syst)
        return hout
Ejemplo n.º 4
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        if self.year == 2016:
            lumimask = LumiMask(
                '../data/lumi/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt'
            )
        if self.year == 2017:
            lumimask = LumiMask(
                '../data/lumi/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt'
            )
        if self.year == 2018:
            lumimask = LumiMask(
                '../data/lumi/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt'
            )

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        if self.year == 2018:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2017:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2016:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ

        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)]

        loose_electron = Collections(ev, "Electron", "looseFCNC", 0,
                                     self.year).get()
        loose_electron = loose_electron[(loose_electron.pt > 25)
                                        & (np.abs(loose_electron.eta) < 2.4)]

        SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron)
                                                               == 2)
        OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron)
                                                               == 2)

        dielectron = choose(electron, 2)
        dielectron_mass = (dielectron['0'] + dielectron['1']).mass
        dielectron_pt = (dielectron['0'] + dielectron['1']).pt

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[(leading_electron_idx)]
        leading_electron = leading_electron[(leading_electron.pt > 30)]

        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]

        ##Muons

        loose_muon = Collections(ev, "Muon", "looseFCNC", 0, self.year).get()
        loose_muon = loose_muon[(loose_muon.pt > 20)
                                & (np.abs(loose_muon.eta) < 2.4)]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet = jet[~match(jet, loose_muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        #weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))
        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_weight(electron))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset, UL=False)
        mask = lumimask(ev.run, ev.luminosityBlock)
        ss = (SSelectron)
        os = (OSelectron)
        mass = (ak.min(np.abs(dielectron_mass - 91.2), axis=1) < 15)
        lead_electron = (ak.min(leading_electron.pt, axis=1) > 30)
        jet1 = (ak.num(jet) >= 1)
        jet2 = (ak.num(jet) >= 2)
        num_loose = ((ak.num(loose_electron) == 2) & (ak.num(loose_muon) == 0))

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('mask', (mask))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('mass', mass)
        selection.add('leading', lead_electron)
        selection.add('triggers', triggers)
        selection.add('one jet', jet1)
        selection.add('two jets', jet2)
        selection.add('num_loose', num_loose)

        bl_reqs = ['filter'] + ['triggers'] + ['mask']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss'] + ['mass'] + ['num_loose'] + ['leading']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os'] + ['mass'] + ['num_loose'] + ['leading']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        j1s_reqs = s_reqs + ['one jet']
        j1s_reqs_d = {sel: True for sel in j1s_reqs}
        j1ss_sel = selection.require(**j1s_reqs_d)

        j1o_reqs = o_reqs + ['one jet']
        j1o_reqs_d = {sel: True for sel in j1o_reqs}
        j1os_sel = selection.require(**j1o_reqs_d)

        j2s_reqs = s_reqs + ['two jets']
        j2s_reqs_d = {sel: True for sel in j2s_reqs}
        j2ss_sel = selection.require(**j2s_reqs_d)

        j2o_reqs = o_reqs + ['two jets']
        j2o_reqs_d = {sel: True for sel in j2o_reqs}
        j2os_sel = selection.require(**j2o_reqs_d)

        #outputs

        output["electron_data1"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        output["electron_data2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        output["electron_data3"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].phi)),
            weight=weight2.weight()[j1os_sel])

        output["electron_data4"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].phi)),
            weight=weight2.weight()[j1os_sel])

        output["electron_data5"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].phi)),
            weight=weight2.weight()[j2os_sel])

        output["electron_data6"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].phi)),
            weight=weight2.weight()[j2os_sel])

        output["electron_data7"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron_data8"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron_data9"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].phi)),
            weight=weight.weight()[j1ss_sel])

        output["electron_data10"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].phi)),
            weight=weight.weight()[j1ss_sel])

        output["electron_data11"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].phi)),
            weight=weight.weight()[j2ss_sel])

        output["electron_data12"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].phi)),
            weight=weight.weight()[j2ss_sel])

        output["dilep_mass1"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[os_sel])),
            weight=weight2.weight()[os_sel])

        output["dilep_mass2"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j1os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j1os_sel])),
            weight=weight2.weight()[j1os_sel])

        output["dilep_mass3"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j2os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j2os_sel])),
            weight=weight2.weight()[j2os_sel])

        output["dilep_mass4"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[ss_sel])),
            weight=weight.weight()[ss_sel])

        output["dilep_mass5"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j1ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j1ss_sel])),
            weight=weight.weight()[j1ss_sel])

        output["dilep_mass6"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j2ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j2ss_sel])),
            weight=weight.weight()[j2ss_sel])

        output["MET"].fill(dataset=dataset,
                           pt=met_pt[os_sel],
                           weight=weight2.weight()[os_sel])

        output["MET2"].fill(dataset=dataset,
                            pt=met_pt[j1os_sel],
                            weight=weight2.weight()[j1os_sel])

        output["MET3"].fill(dataset=dataset,
                            pt=met_pt[j2os_sel],
                            weight=weight2.weight()[j2os_sel])

        output["MET4"].fill(dataset=dataset,
                            pt=met_pt[ss_sel],
                            weight=weight.weight()[ss_sel])

        output["MET5"].fill(dataset=dataset,
                            pt=met_pt[j1ss_sel],
                            weight=weight.weight()[j1ss_sel])

        output["MET6"].fill(dataset=dataset,
                            pt=met_pt[j2ss_sel],
                            weight=weight.weight()[j2ss_sel])

        output["N_jet"].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_sel],
                             weight=weight2.weight()[os_sel])

        output["N_jet2"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j1os_sel],
                              weight=weight2.weight()[j1os_sel])

        output["N_jet3"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j2os_sel],
                              weight=weight2.weight()[j2os_sel])

        output["N_jet4"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[ss_sel],
                              weight=weight.weight()[ss_sel])

        output["N_jet5"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j1ss_sel],
                              weight=weight.weight()[j1ss_sel])

        output["N_jet6"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j2ss_sel],
                              weight=weight.weight()[j2ss_sel])

        output["PV_npvsGood"].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_sel].npvsGood,
                                   weight=weight2.weight()[os_sel])

        output["PV_npvsGood2"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j1os_sel].npvsGood,
                                    weight=weight2.weight()[j1os_sel])

        output["PV_npvsGood3"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j2os_sel].npvsGood,
                                    weight=weight2.weight()[j2os_sel])

        output["PV_npvsGood4"].fill(dataset=dataset,
                                    multiplicity=ev.PV[ss_sel].npvsGood,
                                    weight=weight.weight()[ss_sel])

        output["PV_npvsGood5"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j1ss_sel].npvsGood,
                                    weight=weight.weight()[j1ss_sel])

        output["PV_npvsGood6"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j2ss_sel].npvsGood,
                                    weight=weight.weight()[j2ss_sel])

        return output
Ejemplo n.º 5
0
    def process(self, events):
        # Dataset parameters
        dataset = events.metadata['dataset']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']
        datasets = [
            'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon',
            'DoubleElectron'
        ]
        for d in datasets:
            if d in dataset: dataset = dataset.split('_')[0]

        # Initialize objects
        met = events.MET
        e = events.Electron
        mu = events.Muon
        tau = events.Tau
        j = events.Jet

        # Muon selection

        #mu['isGood'] = isMuonMVA(mu.pt, mu.eta, mu.dxy, mu.dz, mu.miniPFRelIso_all, mu.sip3d, mu.mvaTTH, mu.mediumPromptId, mu.tightCharge, minpt=10)
        mu['isPres'] = isPresMuon(mu.dxy, mu.dz, mu.sip3d, mu.looseId)
        mu['isTight'] = isTightMuon(mu.pt,
                                    mu.eta,
                                    mu.dxy,
                                    mu.dz,
                                    mu.pfRelIso03_all,
                                    mu.sip3d,
                                    mu.mvaTTH,
                                    mu.mediumPromptId,
                                    mu.tightCharge,
                                    mu.looseId,
                                    minpt=10)
        mu['isGood'] = mu['isPres'] & mu['isTight']

        leading_mu = mu[ak.argmax(mu.pt, axis=-1, keepdims=True)]
        leading_mu = leading_mu[leading_mu.isGood]

        mu = mu[mu.isGood]
        mu_pres = mu[mu.isPres]

        # Electron selection
        #e['isGood'] = isElecMVA(e.pt, e.eta, e.dxy, e.dz, e.miniPFRelIso_all, e.sip3d, e.mvaTTH, e.mvaFall17V2Iso, e.lostHits, e.convVeto, e.tightCharge, minpt=10)
        e['isPres'] = isPresElec(e.pt,
                                 e.eta,
                                 e.dxy,
                                 e.dz,
                                 e.miniPFRelIso_all,
                                 e.sip3d,
                                 e.lostHits,
                                 minpt=15)
        e['isTight'] = isTightElec(e.pt,
                                   e.eta,
                                   e.dxy,
                                   e.dz,
                                   e.miniPFRelIso_all,
                                   e.sip3d,
                                   e.mvaTTH,
                                   e.mvaFall17V2Iso,
                                   e.lostHits,
                                   e.convVeto,
                                   e.tightCharge,
                                   e.sieie,
                                   e.hoe,
                                   e.eInvMinusPInv,
                                   minpt=15)
        e['isClean'] = isClean(e, mu, drmin=0.05)
        e['isGood'] = e['isPres'] & e['isTight'] & e['isClean']

        leading_e = e[ak.argmax(e.pt, axis=-1, keepdims=True)]
        leading_e = leading_e[leading_e.isGood]

        e = e[e.isGood]
        e_pres = e[e.isPres & e.isClean]

        # Tau selection
        tau['isPres'] = isPresTau(tau.pt,
                                  tau.eta,
                                  tau.dxy,
                                  tau.dz,
                                  tau.leadTkPtOverTauPt,
                                  tau.idAntiMu,
                                  tau.idAntiEle,
                                  tau.rawIso,
                                  tau.idDecayModeNewDMs,
                                  minpt=20)
        tau['isClean'] = isClean(tau, e_pres, drmin=0.4) & isClean(
            tau, mu_pres, drmin=0.4)
        tau['isGood'] = tau['isPres']  # & tau['isClean'], for the moment
        tau = tau[tau.isGood]

        nElec = ak.num(e)
        nMuon = ak.num(mu)
        nTau = ak.num(tau)

        twoLeps = (nElec + nMuon) == 2
        threeLeps = (nElec + nMuon) == 3
        twoElec = (nElec == 2)
        twoMuon = (nMuon == 2)
        e0 = e[ak.argmax(e.pt, axis=-1, keepdims=True)]
        m0 = mu[ak.argmax(mu.pt, axis=-1, keepdims=True)]

        # Jet selection

        jetptname = 'pt_nom' if hasattr(j, 'pt_nom') else 'pt'
        j['isGood'] = isTightJet(getattr(j,
                                         jetptname), j.eta, j.jetId, j.neHEF,
                                 j.neEmEF, j.chHEF, j.chEmEF, j.nConstituents)
        #j['isgood']  = isGoodJet(j.pt, j.eta, j.jetId)
        #j['isclean'] = isClean(j, e, mu)
        j['isClean'] = isClean(j, e, drmin=0.4) & isClean(
            j, mu, drmin=0.4)  # & isClean(j, tau, drmin=0.4)
        goodJets = j[(j.isClean) & (j.isGood)]
        njets = ak.num(goodJets)
        ht = ak.sum(goodJets.pt, axis=-1)
        j0 = goodJets[ak.argmax(goodJets.pt, axis=-1, keepdims=True)]
        #nbtags = ak.num(goodJets[goodJets.btagDeepFlavB > 0.2770])
        nbtags = ak.num(goodJets[goodJets.btagDeepB > 0.4941])

        ##################################################################
        ### 2 same-sign leptons
        ##################################################################

        # emu
        singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)]
        singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)]
        em = ak.cartesian({"e": singe, "m": singm})
        emSSmask = (em.e.charge * em.m.charge > 0)
        emSS = em[emSSmask]
        nemSS = len(ak.flatten(emSS))

        # ee and mumu
        # pt>-1 to preserve jagged dimensions
        ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)]
        mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)]

        eepairs = ak.combinations(ee, 2, fields=["e0", "e1"])
        eeSSmask = (eepairs.e0.charge * eepairs.e1.charge > 0)
        eeonZmask = (np.abs((eepairs.e0 + eepairs.e1).mass - 91.2) < 10)
        eeoffZmask = (eeonZmask == 0)

        mmpairs = ak.combinations(mm, 2, fields=["m0", "m1"])
        mmSSmask = (mmpairs.m0.charge * mmpairs.m1.charge > 0)
        mmonZmask = (np.abs((mmpairs.m0 + mmpairs.m1).mass - 91.2) < 10)
        mmoffZmask = (mmonZmask == 0)

        eeSSonZ = eepairs[eeSSmask & eeonZmask]
        eeSSoffZ = eepairs[eeSSmask & eeoffZmask]
        mmSSonZ = mmpairs[mmSSmask & mmonZmask]
        mmSSoffZ = mmpairs[mmSSmask & mmoffZmask]
        neeSS = len(ak.flatten(eeSSonZ)) + len(ak.flatten(eeSSoffZ))
        nmmSS = len(ak.flatten(mmSSonZ)) + len(ak.flatten(mmSSoffZ))

        print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]' %
              (neeSS, nemSS, nmmSS))

        # Cuts
        eeSSmask = (ak.num(eeSSmask[eeSSmask]) > 0)
        mmSSmask = (ak.num(mmSSmask[mmSSmask]) > 0)
        eeonZmask = (ak.num(eeonZmask[eeonZmask]) > 0)
        eeoffZmask = (ak.num(eeoffZmask[eeoffZmask]) > 0)
        mmonZmask = (ak.num(mmonZmask[mmonZmask]) > 0)
        mmoffZmask = (ak.num(mmoffZmask[mmoffZmask]) > 0)
        emSSmask = (ak.num(emSSmask[emSSmask]) > 0)

        ##################################################################
        ### 3 leptons
        ##################################################################

        # eem
        muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)]
        elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)]
        ee_eem = ak.combinations(elec_eem, 2, fields=["e0", "e1"])

        ee_eemZmask = (ee_eem.e0.charge * ee_eem.e1.charge < 1) & (np.abs(
            (ee_eem.e0 + ee_eem.e1).mass - 91.2) < 10)
        ee_eemOffZmask = (ee_eem.e0.charge * ee_eem.e1.charge < 1) & (np.abs(
            (ee_eem.e0 + ee_eem.e1).mass - 91.2) > 10)
        ee_eemZmask = (ak.num(ee_eemZmask[ee_eemZmask]) > 0)
        ee_eemOffZmask = (ak.num(ee_eemOffZmask[ee_eemOffZmask]) > 0)

        eepair_eem = (ee_eem.e0 + ee_eem.e1)
        trilep_eem = eepair_eem + muon_eem  #ak.cartesian({"e0":ee_eem.e0,"e1":ee_eem.e1, "m":muon_eem})

        # mme
        muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)]
        elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)]

        mm_mme = ak.combinations(muon_mme, 2, fields=["m0", "m1"])
        mm_mmeZmask = (mm_mme.m0.charge * mm_mme.m1.charge < 1) & (np.abs(
            (mm_mme.m0 + mm_mme.m1).mass - 91.2) < 10)
        mm_mmeOffZmask = (mm_mme.m0.charge * mm_mme.m1.charge < 1) & (np.abs(
            (mm_mme.m0 + mm_mme.m1).mass - 91.2) > 10)
        mm_mmeZmask = (ak.num(mm_mmeZmask[mm_mmeZmask]) > 0)
        mm_mmeOffZmask = (ak.num(mm_mmeOffZmask[mm_mmeOffZmask]) > 0)

        mmpair_mme = (mm_mme.m0 + mm_mme.m1)
        trilep_mme = mmpair_mme + elec_mme

        mZ_mme = mmpair_mme.mass
        mZ_eem = eepair_eem.mass
        m3l_eem = trilep_eem.mass
        m3l_mme = trilep_mme.mass

        # eee and mmm
        eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)]
        mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)]

        eee_leps = ak.combinations(eee, 3, fields=["e0", "e1", "e2"])
        mmm_leps = ak.combinations(mmm, 3, fields=["m0", "m1", "m2"])
        ee_pairs = ak.combinations(eee, 2, fields=["e0", "e1"])
        mm_pairs = ak.combinations(mmm, 2, fields=["m0", "m1"])
        ee_pairs_index = ak.argcombinations(eee, 2, fields=["e0", "e1"])
        mm_pairs_index = ak.argcombinations(mmm, 2, fields=["m0", "m1"])

        mmSFOS_pairs = mm_pairs[
            (np.abs(mm_pairs.m0.pdgId) == np.abs(mm_pairs.m1.pdgId))
            & (mm_pairs.m0.charge != mm_pairs.m1.charge)]
        offZmask_mm = ak.all(
            np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2) > 10.,
            axis=1,
            keepdims=True) & (ak.num(mmSFOS_pairs) > 0)
        onZmask_mm = ak.any(
            np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2) < 10.,
            axis=1,
            keepdims=True)

        eeSFOS_pairs = ee_pairs[
            (np.abs(ee_pairs.e0.pdgId) == np.abs(ee_pairs.e1.pdgId))
            & (ee_pairs.e0.charge != ee_pairs.e1.charge)]
        offZmask_ee = ak.all(
            np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2) > 10,
            axis=1,
            keepdims=True) & (ak.num(eeSFOS_pairs) > 0)
        onZmask_ee = ak.any(
            np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2) < 10,
            axis=1,
            keepdims=True)

        # Create masks **for event selection**
        eeeOnZmask = (ak.num(onZmask_ee[onZmask_ee]) > 0)
        eeeOffZmask = (ak.num(offZmask_ee[offZmask_ee]) > 0)
        mmmOnZmask = (ak.num(onZmask_mm[onZmask_mm]) > 0)
        mmmOffZmask = (ak.num(offZmask_mm[offZmask_mm]) > 0)

        # Now we need to create masks for the leptons in order to select leptons from the Z boson candidate (in onZ categories)
        ZeeMask = ak.argmin(np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass -
                                   91.2),
                            axis=1,
                            keepdims=True)
        ZmmMask = ak.argmin(np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass -
                                   91.2),
                            axis=1,
                            keepdims=True)

        Zee = eeSFOS_pairs[ZeeMask]
        Zmm = mmSFOS_pairs[ZmmMask]
        eZ0 = Zee.e0[ak.num(eeSFOS_pairs) > 0]
        eZ1 = Zee.e1[ak.num(eeSFOS_pairs) > 0]
        eZ = eZ0 + eZ1
        mZ0 = Zmm.m0[ak.num(mmSFOS_pairs) > 0]
        mZ1 = Zmm.m1[ak.num(mmSFOS_pairs) > 0]
        mZ = mZ0 + mZ1
        mZ_eee = eZ.mass
        mZ_mmm = mZ.mass

        # And for the W boson
        ZmmIndices = mm_pairs_index[ZmmMask]
        ZeeIndices = ee_pairs_index[ZeeMask]
        eW = eee[~ZeeIndices.e0 | ~ZeeIndices.e1]
        mW = mmm[~ZmmIndices.m0 | ~ZmmIndices.m1]

        triElec = eee_leps.e0 + eee_leps.e1 + eee_leps.e2
        triMuon = mmm_leps.m0 + mmm_leps.m1 + mmm_leps.m2
        m3l_eee = triElec.mass
        m3l_mmm = triMuon.mass

        # Triggers
        trig_eeSS = passTrigger(events, 'ee', isData, dataset)
        trig_mmSS = passTrigger(events, 'mm', isData, dataset)
        trig_emSS = passTrigger(events, 'em', isData, dataset)
        trig_eee = passTrigger(events, 'eee', isData, dataset)
        trig_mmm = passTrigger(events, 'mmm', isData, dataset)
        trig_eem = passTrigger(events, 'eem', isData, dataset)
        trig_mme = passTrigger(events, 'mme', isData, dataset)

        # MET filters

        # Weights
        genw = np.ones_like(
            events['MET_pt']) if isData else events['genWeight']
        weights = coffea.analysis_tools.Weights(len(events))
        weights.add('norm', genw if isData else (xsec / sow) * genw)
        eftweights = events['EFTfitCoefficients'] if hasattr(
            events, "EFTfitCoefficients") else []

        # Selections and cuts
        selections = PackedSelection()
        channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS']
        selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('emSS', (emSSmask) & (trig_emSS))

        channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ']
        selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem))
        selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem))
        selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme))
        selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme))

        channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ']
        selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee))
        selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee))
        selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm))
        selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm))

        levels = ['base', '2jets', '4jets', '4j1b', '4j2b']
        selections.add('base', (nElec + nMuon >= 2))
        selections.add('2jets', (njets >= 2))
        selections.add('4jets', (njets >= 4))
        selections.add('4j1b', (njets >= 4) & (nbtags >= 1))
        selections.add('4j2b', (njets >= 4) & (nbtags >= 2))

        # Variables
        invMass_eeSSonZ = (eeSSonZ.e0 + eeSSonZ.e1).mass
        invMass_eeSSoffZ = (eeSSoffZ.e0 + eeSSoffZ.e1).mass
        invMass_mmSSonZ = (mmSSonZ.m0 + mmSSonZ.m1).mass
        invMass_mmSSoffZ = (mmSSoffZ.m0 + mmSSoffZ.m1).mass
        invMass_emSS = (emSS.e + emSS.m).mass

        varnames = {}
        varnames['met'] = met.pt
        varnames['ht'] = ht
        varnames['njets'] = njets
        varnames['nbtags'] = nbtags
        varnames['invmass'] = {
            'eeSSonZ': invMass_eeSSonZ,
            'eeSSoffZ': invMass_eeSSoffZ,
            'mmSSonZ': invMass_mmSSonZ,
            'mmSSoffZ': invMass_mmSSoffZ,
            'emSS': invMass_emSS,
            'eemSSonZ': mZ_eem,
            'eemSSoffZ': mZ_eem,
            'mmeSSonZ': mZ_mme,
            'mmeSSoffZ': mZ_mme,
            'eeeSSonZ': mZ_eee,
            'eeeSSoffZ': mZ_eee,
            'mmmSSonZ': mZ_mmm,
            'mmmSSoffZ': mZ_mmm,
        }
        varnames['m3l'] = {
            'eemSSonZ': m3l_eem,
            'eemSSoffZ': m3l_eem,
            'mmeSSonZ': m3l_mme,
            'mmeSSoffZ': m3l_mme,
            'eeeSSonZ': m3l_eee,
            'eeeSSoffZ': m3l_eee,
            'mmmSSonZ': m3l_mmm,
            'mmmSSoffZ': m3l_mmm,
        }
        varnames['e0pt'] = e0.pt
        varnames['e0eta'] = e0.eta
        varnames['m0pt'] = m0.pt
        varnames['m0eta'] = m0.eta
        varnames['j0pt'] = j0.pt
        varnames['j0eta'] = j0.eta
        varnames['counts'] = np.ones_like(events.MET.pt)

        # fill Histos
        hout = self.accumulator.identity()
        allweights = weights.weight().flatten(
        )  # Why does it not complain about .flatten() here?
        hout['SumOfEFTweights'].fill(eftweights,
                                     sample=dataset,
                                     SumOfEFTweights=varnames['counts'],
                                     weight=allweights)

        for var, v in varnames.items():
            for ch in channels2LSS + channels3L:
                for lev in levels:
                    weight = weights.weight()
                    cuts = [ch] + [lev]
                    cut = selections.all(*cuts)
                    weights_flat = weight[cut].flatten(
                    )  # Why does it not complain about .flatten() here?
                    weights_ones = np.ones_like(weights_flat, dtype=np.int)
                    eftweightsvalues = eftweights[cut] if len(
                        eftweights) > 0 else []
                    if var == 'invmass':
                        if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue
                        elif ch in ['eeeSSonZ', 'mmmSSonZ']:
                            continue  #values = v[ch]
                        else:
                            values = ak.flatten(v[ch][cut])
                        hout['invmass'].fill(sample=dataset,
                                             channel=ch,
                                             cut=lev,
                                             invmass=values,
                                             weight=weights_flat)
                    elif var == 'm3l':
                        if ch in [
                                'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ',
                                'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ',
                                'mmmSSonZ'
                        ]:
                            continue
                        values = ak.flatten(v[ch][cut])
                        hout['m3l'].fill(eftweightsvalues,
                                         sample=dataset,
                                         channel=ch,
                                         cut=lev,
                                         m3l=values,
                                         weight=weights_flat)
                    else:
                        values = v[cut]
                        if var == 'ht':
                            hout[var].fill(eftweightsvalues,
                                           ht=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'met':
                            hout[var].fill(eftweightsvalues,
                                           met=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'njets':
                            hout[var].fill(eftweightsvalues,
                                           njets=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'nbtags':
                            hout[var].fill(eftweightsvalues,
                                           nbtags=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'counts':
                            hout[var].fill(counts=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_ones)
                        elif var == 'j0eta':
                            if lev == 'base': continue
                            values = ak.flatten(values)
                            #values=np.asarray(values)
                            hout[var].fill(eftweightsvalues,
                                           j0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'e0pt':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            values = ak.flatten(values)
                            #values=np.asarray(values)
                            hout[var].fill(
                                eftweightsvalues,
                                e0pt=values,
                                sample=dataset,
                                channel=ch,
                                cut=lev,
                                weight=weights_flat
                            )  # Crashing here, not sure why. Related to values?
                        elif var == 'm0pt':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            values = ak.flatten(values)
                            #values=np.asarray(values)
                            hout[var].fill(eftweightsvalues,
                                           m0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'e0eta':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            values = ak.flatten(values)
                            #values=np.asarray(values)
                            hout[var].fill(eftweightsvalues,
                                           e0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0eta':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            values = ak.flatten(values)
                            #values=np.asarray(values)
                            hout[var].fill(eftweightsvalues,
                                           m0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0pt':
                            if lev == 'base': continue
                            values = ak.flatten(values)
                            #values=np.asarray(values)
                            hout[var].fill(eftweightsvalues,
                                           j0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
        return hout
Ejemplo n.º 6
0
    def process(self, events):

        # Dataset parameters
        dataset = events.metadata['dataset']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']
        datasets = [
            'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon',
            'DoubleElectron'
        ]
        for d in datasets:
            if d in dataset: dataset = dataset.split('_')[0]

        # Extract the EFT quadratic coefficients and optionally use them to calculate the coefficients on the w**2 quartic function
        # eft_coeffs is never Jagged so convert immediately to numpy for ease of use.
        eft_coeffs = ak.to_numpy(events['EFTfitCoefficients']) if hasattr(
            events, "EFTfitCoefficients") else None
        if eft_coeffs is not None:
            # Check to see if the ordering of WCs for this sample matches what want
            if self._samples[dataset]['WCnames'] != self._wc_names_lst:
                eft_coeffs = efth.remap_coeffs(
                    self._samples[dataset]['WCnames'], self._wc_names_lst,
                    eft_coeffs)
        eft_w2_coeffs = efth.calc_w2_coeffs(eft_coeffs, self._dtype) if (
            self._do_errors and eft_coeffs is not None) else None

        # Initialize objects (GEN objects)
        e = events.GenPart[abs(events.GenPart.pdgId) == 11]
        m = events.GenPart[abs(events.GenPart.pdgId) == 13]
        tau = events.GenPart[abs(events.GenPart.pdgId) == 15]
        j = events.GenJet

        run = events.run
        luminosityBlock = events.luminosityBlock
        event = events.event

        print("\n\nInfo about events:")
        print("\trun:", run)
        print("\tluminosityBlock:", luminosityBlock)
        print("\tevent:", event)

        print("\nLeptons before selection:")
        print("\te pt", e.pt)
        print("\te eta", e.eta)
        print("\tm pt", m.pt)
        print("\tm eta", m.eta)

        ######## Lep selection  ########

        e_selec = ((e.pt > 15) & (abs(e.eta) < 2.5))
        m_selec = ((m.pt > 15) & (abs(m.eta) < 2.5))
        e = e[e_selec]
        m = m[m_selec]

        # Put the e and mu togheter
        l = ak.concatenate([e, m], axis=1)

        n_e = ak.num(e)
        n_m = ak.num(m)
        n_l = ak.num(l)

        at_least_two_leps = (n_l >= 2)

        e0 = e[ak.argmax(e.pt, axis=-1, keepdims=True)]
        m0 = m[ak.argmax(m.pt, axis=-1, keepdims=True)]
        l0 = l[ak.argmax(l.pt, axis=-1, keepdims=True)]

        print("\nLeptons after selection:")
        print("\te pt", e.pt)
        print("\tm pt", m.pt)
        print("\tl pt:", l.pt)
        print("\tn e", n_e)
        print("\tn m", n_m)
        print("\tn l", n_l)

        print("\nMask for at least two lep:", at_least_two_leps)

        print("\nLeading lepton info:")
        print("\te0", e0.pt)
        print("\tm0", m0.pt)
        print("\tl0", l0.pt)

        ######## Jet selection  ########

        print("\nJet info:")
        print("\tjpt before selection", j.pt)

        j_selec = ((j.pt > 30) & (abs(j.eta) < 2.5))
        print("\tjselect", j_selec)

        j = j[j_selec]
        print("\tjpt", j.pt)

        j['isClean'] = isClean(j, e, drmin=0.4) & isClean(j, m, drmin=0.4)
        j_isclean = isClean(j, e, drmin=0.4) & isClean(j, m, drmin=0.4)
        print("\tj is clean", j_isclean)

        j = j[j_isclean]
        print("\tclean jets pt", j.pt)

        n_j = ak.num(j)
        print("\tn_j", n_j)
        j0 = j[ak.argmax(j.pt, axis=-1, keepdims=True)]

        print("\tj0pt", j0.pt)

        at_least_two_jets = (n_j >= 2)
        print("\tat_least_two_jets", at_least_two_jets)

        ######## Selections and cuts ########

        event_selec = (at_least_two_leps & at_least_two_jets)
        print("\nEvent selection:", event_selec, "\n")

        selections = PackedSelection()
        selections.add('2l2j', event_selec)

        varnames = {}
        varnames['counts'] = np.ones_like(events.MET.pt)
        varnames['njets'] = n_j
        varnames['j0pt'] = j0.pt
        varnames['j0eta'] = j0.eta
        varnames['l0pt'] = l0.pt

        ######## Fill histos ########

        print("\nFilling hists now...\n")
        hout = self.accumulator.identity()
        for var, v in varnames.items():
            cut = selections.all("2l2j")
            values = v[cut]
            eft_coeffs_cut = eft_coeffs[cut] if eft_coeffs is not None else None
            eft_w2_coeffs_cut = eft_w2_coeffs[
                cut] if eft_w2_coeffs is not None else None
            if var == "counts":
                hout[var].fill(counts=values,
                               sample=dataset,
                               channel="2l",
                               cut="2l")
            elif var == "njets":
                hout[var].fill(njets=values,
                               sample=dataset,
                               channel="2l",
                               cut="2l",
                               eft_coeff=eft_coeffs_cut,
                               eft_err_coeff=eft_w2_coeffs_cut)
            elif var == "j0pt":
                hout[var].fill(j0pt=values,
                               sample=dataset,
                               channel="2l",
                               cut="2l",
                               eft_coeff=eft_coeffs_cut,
                               eft_err_coeff=eft_w2_coeffs_cut)
            elif var == "j0eta":
                hout[var].fill(j0eta=values,
                               sample=dataset,
                               channel="2l",
                               cut="2l",
                               eft_coeff=eft_coeffs_cut,
                               eft_err_coeff=eft_w2_coeffs_cut)
            elif var == "l0pt":
                hout[var].fill(l0pt=values,
                               sample=dataset,
                               channel="2l",
                               cut="2l",
                               eft_coeff=eft_coeffs_cut,
                               eft_err_coeff=eft_w2_coeffs_cut)

        return hout
    def process(self, events):

        output = self.accumulator.identity()

        output['total']['all'] += len(events)
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        ## Muons
        muon = Collections(ev, "Muon", "vetoTTH").get()
        tightmuon = Collections(ev, "Muon", "tightTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "vetoTTH").get()
        tightelectron = Collections(ev, "Electron", "tightTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)

        lepton = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton + trailing_lepton).mass
        dilepton_pt = (leading_lepton + trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        tau = getTaus(ev)
        track = getIsoTracks(ev)
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator

        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        bl = cross(lepton, high_score_btag)
        bl_dR = delta_r(bl['0'], bl['1'])
        min_bl_dR = ak.min(bl_dR, axis=1)

        jf = cross(j_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        j_fwd2 = jf[ak.singletons(
            ak.argmax(mjf, axis=1)
        )]['1']  # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta = ak.fill_none(
            ak.pad_none(abs(j_fwd2.eta - j_fwd.eta), 1, clip=True), 0)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(tightelectron) + ak.num(tightmuon)) == 2)
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(electron) + ak.num(muon)) == 2)

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('SS', (SSlepton | SSelectron | SSmuon))
        selection.add('N_jet>3', (ak.num(jet) >= 4))
        selection.add('N_central>2', (ak.num(central) >= 3))
        selection.add('N_btag>0', (ak.num(btag) >= 1))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))

        #ss_reqs = ['lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS']
        ss_reqs = [
            'lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS'
        ]
        #bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0', 'N_fwd>0']
        bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0']

        ss_reqs_d = {sel: True for sel in ss_reqs}
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight)

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        #cutflow     = Cutflow(output, ev, weight=weight)
        #cutflow_reqs_d = {}
        #for req in bl_reqs:
        #    cutflow_reqs_d.update({req: True})
        #    cutflow.addRow( req, selection.require(**cutflow_reqs_d) )

        labels = {
            'topW_v3': 0,
            'TTW': 1,
            'TTZ': 2,
            'TTH': 3,
            'ttbar': 4,
            'ttbar1l_MG': 4
        }
        if dataset in labels:
            label_mult = labels[dataset]
        else:
            label_mult = 5
        label = np.ones(len(ev[BL])) * label_mult

        output["n_lep"] += processor.column_accumulator(
            ak.to_numpy((ak.num(electron) + ak.num(muon))[BL]))
        output["n_lep_tight"] += processor.column_accumulator(
            ak.to_numpy((ak.num(tightelectron) + ak.num(tightmuon))[BL]))

        output["lead_lep_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].pt, axis=1)))
        output["lead_lep_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].eta, axis=1)))
        output["lead_lep_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].phi, axis=1)))
        output["lead_lep_charge"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].charge, axis=1)))

        output["sublead_lep_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].pt, axis=1)))
        output["sublead_lep_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].eta, axis=1)))
        output["sublead_lep_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].phi, axis=1)))
        output["sublead_lep_charge"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].charge, axis=1)))

        output["lead_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].pt, axis=1)))
        output["lead_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].eta, axis=1)))
        output["lead_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].phi, axis=1)))

        output["sublead_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].pt, axis=1)))
        output["sublead_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].eta, axis=1)))
        output["sublead_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].phi, axis=1)))

        output["lead_btag_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].pt, axis=1)))
        output["lead_btag_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].eta, axis=1)))
        output["lead_btag_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].phi, axis=1)))

        output["sublead_btag_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].pt, axis=1)))
        output["sublead_btag_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].eta, axis=1)))
        output["sublead_btag_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].phi, axis=1)))

        output["fwd_jet_p"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(ak.pad_none(j_fwd[BL].p, 1, clip=True),
                                        0),
                           axis=1)))
        output["fwd_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].pt, 1, clip=True), 0),
                           axis=1)))
        output["fwd_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].eta, 1, clip=True), 0),
                           axis=1)))
        output["fwd_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].phi, 1, clip=True), 0),
                           axis=1)))

        output["mjj_max"] += processor.column_accumulator(
            ak.to_numpy(ak.fill_none(ak.max(mjf[BL], axis=1), 0)))
        output["delta_eta_jj"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(delta_eta[BL], axis=1)))

        output["met"] += processor.column_accumulator(ak.to_numpy(met_pt[BL]))
        output["ht"] += processor.column_accumulator(ak.to_numpy(ht[BL]))
        output["st"] += processor.column_accumulator(ak.to_numpy(st[BL]))
        output["n_jet"] += processor.column_accumulator(
            ak.to_numpy(ak.num(jet[BL])))
        output["n_btag"] += processor.column_accumulator(
            ak.to_numpy(ak.num(btag[BL])))
        output["n_fwd"] += processor.column_accumulator(
            ak.to_numpy(ak.num(fwd[BL])))
        output["n_central"] += processor.column_accumulator(
            ak.to_numpy(ak.num(central[BL])))
        output["n_tau"] += processor.column_accumulator(
            ak.to_numpy(ak.num(tau[BL])))
        output["n_track"] += processor.column_accumulator(
            ak.to_numpy(ak.num(track[BL])))

        output["dilepton_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(dilepton_pt[BL], axis=1)))
        output["dilepton_mass"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(dilepton_mass[BL], axis=1)))
        output["min_bl_dR"] += processor.column_accumulator(
            ak.to_numpy(min_bl_dR[BL]))
        output["min_mt_lep_met"] += processor.column_accumulator(
            ak.to_numpy(min_mt_lep_met[BL]))

        output["label"] += processor.column_accumulator(label)
        output["weight"] += processor.column_accumulator(weight.weight()[BL])

        output["presel"]["all"] += len(ev[ss_selection])
        output["sel"]["all"] += len(ev[BL])

        return output
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tight").get()
        vetomuon = Collections(ev, "Muon", "veto").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        vetoelectron = Collections(ev, "Electron", "veto").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7)
        jet = jet[(jet.pt > 25) & (jet.jetId > 1)]
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)
        triggers = getTriggers(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(electron) == 1) & (ak.num(muon) == 1))
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(vetoelectron) + ak.num(vetomuon)) == 2)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            ## PU weight - not in the babies...
            #weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)

            # b-tag SFs
            #weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('trigger', (triggers))
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('OS', OSlepton)
        selection.add('N_jet>2', (ak.num(jet) >= 3))
        selection.add('MET>30', (ev.MET.pt > 30))

        os_reqs = [
            'lepveto', 'dilep', 'trigger', 'filter', 'p_T(lep0)>25',
            'p_T(lep1)>20', 'OS'
        ]
        bl_reqs = os_reqs + ['N_jet>2', 'MET>30']

        os_reqs_d = {sel: True for sel in os_reqs}
        os_selection = selection.require(**os_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        cutflow = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in bl_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow(req, selection.require(**cutflow_reqs_d))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[os_selection].npvs,
                               weight=weight.weight()[os_selection])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_selection].npvsGood,
                                   weight=weight.weight()[os_selection])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_selection],
                             weight=weight.weight()[os_selection])

        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[os_selection].pt,
                           phi=ev.MET[os_selection].phi,
                           weight=weight.weight()[os_selection])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        # Now, take care of systematic unceratinties
        if not dataset == 'MuonEG':
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet_var = getPtEtaPhi(alljets, pt_var=var)
                jet_var = jet_var[(jet_var.pt > 25)]
                jet_var = jet_var[~match(
                    jet_var, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet_var = jet_var[~match(
                    jet_var, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                # get the modified selection -> more difficult
                selection.add(
                    'N_jet>2_' + var, (ak.num(jet_var.pt) > 3)
                )  # something needs to be improved with getPtEtaPhi function
                selection.add('MET>30_' + var, (getattr(ev.MET, var) > 30))

                bl_reqs = os_reqs + ['N_jet>2_' + var, 'MET>30_' + var]
                bl_reqs_d = {sel: True for sel in bl_reqs}
                BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(jet_var)[os_selection],
                    weight=weight.weight()[os_selection])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet_var.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet_var.eta[:,
                                                                    0:1][BL]),
                                         phi=ak.flatten(jet_var.phi[:,
                                                                    0:1][BL]),
                                         weight=weight.weight()[BL])

        return output
Ejemplo n.º 9
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.miniPFRelIso_all < 0.12)
                            & (electron.pt > 20) & (abs(electron.eta) < 2.4)]

        gen_matched_electron = electron[((electron.genPartIdx >= 0) & (abs(
            electron.matched_gen.pdgId) == 11))]
        n_gen = ak.num(gen_matched_electron)

        is_flipped = ((gen_matched_electron.matched_gen.pdgId *
                       (-1) == gen_matched_electron.pdgId) &
                      (abs(gen_matched_electron.pdgId) == 11))

        #is_flipped = (abs(ev.GenPart[gen_matched_electron.genPartIdx].pdgId) == abs(gen_matched_electron.pdgId))&(ev.GenPart[gen_matched_electron.genPartIdx].pdgId/abs(ev.GenPart[gen_matched_electron.genPartIdx].pdgId) != gen_matched_electron.pdgId/abs(gen_matched_electron.pdgId))
        flipped_electron = gen_matched_electron[is_flipped]
        n_flips = ak.num(flipped_electron)

        sielectron = choose(electron, 1)

        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)

        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_ratio(sielectron['0']))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        electr = ((ak.num(electron) == 2))
        ss = (SSelectron)
        gen = (n_gen >= 1)
        flip = (n_flips >= 1)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('electr', electr)
        selection.add('ss', ss)
        selection.add('flip', flip)
        selection.add('gen', gen)

        bl_reqs = ['filter', 'electr']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        f_reqs = bl_reqs + ['gen', 'flip']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)

        #outputs
        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[flip_sel],
                             weight=weight.weight()[flip_sel])
        output['electron_flips'].fill(dataset=dataset,
                                      multiplicity=n_flips[flip_sel],
                                      weight=weight.weight()[flip_sel])

        output['N_ele2'].fill(dataset=dataset,
                              multiplicity=ak.num(electron)[baseline],
                              weight=weight2.weight()[baseline])
        output['electron_flips2'].fill(dataset=dataset,
                                       multiplicity=n_flips[baseline],
                                       weight=weight2.weight()[baseline])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(flipped_electron[flip_sel].pt)),
            eta=ak.to_numpy(ak.flatten(abs(flipped_electron[flip_sel].eta))),
            #phi = ak.to_numpy(ak.flatten(leading_electron[baseline].phi)),
            weight=weight.weight()[flip_sel])

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[baseline].pt)),
            eta=ak.to_numpy(ak.flatten(abs(leading_electron[baseline].eta))),
            #phi = ak.to_numpy(ak.flatten(leading_electron[baseline].phi)),
            weight=weight2.weight()[baseline])

        return output
Ejemplo n.º 10
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = ev.Muon

        ## Electrons
        electron = ev.Electron

        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)

        lepton = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)

        filters = getFilters(ev, year=self.year, dataset=dataset)
        dilep = ((ak.num(electron) + ak.num(muon)) == 2)

        selection = PackedSelection()
        selection.add('dilep', dilep)
        selection.add('filter', (filters))

        bl_reqs = ['dilep', 'filter']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[baseline],
                             weight=weight.weight()[baseline])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(muon)[baseline],
                            weight=weight.weight()[baseline])

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[baseline].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[baseline].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[baseline].phi)),
            weight=weight.weight()[baseline])

        return output
Ejemplo n.º 11
0
    def process(self, events):
        # Dataset parameters
        dataset = events.metadata['dataset']
        year   = self._samples[dataset]['year']
        xsec   = self._samples[dataset]['xsec']
        sow    = self._samples[dataset]['nSumOfWeights' ]
        isData = self._samples[dataset]['isData']
        datasets = ['SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon', 'DoubleElectron']
        for d in datasets: 
          if d in dataset: dataset = dataset.split('_')[0] 

        # Inittialize objects
        met = events.GenMET
        e = events.GenPart[abs(events.GenPart.pdgId)==11]
        mu = events.GenPart[abs(events.GenPart.pdgId)==13]
        tau = events.GenPart[abs(events.GenPart.pdgId)==15]
        j = events.GenJet

        leading_mu = mu[ak.argmax(mu.pt,axis=-1,keepdims=True)]

        leading_e = e[ak.argmax(e.pt,axis=-1,keepdims=True)]

        nElec = ak.num(e)
        nMuon = ak.num(mu)
        nTau  = ak.num(tau)

        twoLeps   = (nElec+nMuon) == 2
        threeLeps = (nElec+nMuon) == 3
        twoElec   = (nElec == 2)
        twoMuon   = (nMuon == 2)
        e0 = e[ak.argmax(e.pt,axis=-1,keepdims=True)]
        m0 = mu[ak.argmax(mu.pt,axis=-1,keepdims=True)]
        elecs = e[ak.argsort(e.pt, ascending=False)]
        muons = mu[ak.argsort(mu.pt, ascending=False)]
        e1 = elecs
        e2 = elecs
        m1 = muons
        m2 = muons

        # Jet selection

        jetptname = 'pt_nom' if hasattr(j, 'pt_nom') else 'pt'
        njets = ak.num(j)
        ht = ak.sum(j.pt,axis=-1)
        jets = j[ak.argsort(j.pt, ascending=False)]
        j0 = j[ak.argmax(j.pt,axis=-1,keepdims=True)]
        j1 = jets
        j2 = jets
        j3 = jets
        nbtags = ak.num(j[abs(j.hadronFlavour)==5])

        ##################################################################
        ### 2 same-sign leptons
        ##################################################################

        # emu
        singe = e [(nElec==1)&(nMuon==1)&(e .pt>-1)]
        singm = mu[(nElec==1)&(nMuon==1)&(mu.pt>-1)]
        em = ak.cartesian({"e":singe,"m":singm})
        emSSmask = (em.e.pdgId*em.m.pdgId>0)
        emSS = em[emSSmask]
        nemSS = len(ak.flatten(emSS))

        year = 2018
        lepSF_emSS = GetLeptonSF(mu.pt, mu.eta, 'm', e.pt, e.eta, 'e', year=year)

        # ee and mumu
        # pt>-1 to preserve jagged dimensions
        ee = e [(nElec==2)&(nMuon==0)&(e.pt>-1)]
        mm = mu[(nElec==0)&(nMuon==2)&(mu.pt>-1)]

        eepairs = ak.combinations(ee, 2, fields=["e0","e1"])
        eeSSmask = (eepairs.e0.pdgId*eepairs.e1.pdgId>0)
        eeonZmask  = (np.abs((eepairs.e0+eepairs.e1).mass-91.2)<10)
        eeoffZmask = (eeonZmask==0)

        mmpairs = ak.combinations(mm, 2, fields=["m0","m1"])
        mmSSmask = (mmpairs.m0.pdgId*mmpairs.m1.pdgId>0)
        mmonZmask = (np.abs((mmpairs.m0+mmpairs.m1).mass-91.2)<10)
        mmoffZmask = (mmonZmask==0)

        eeSSonZ  = eepairs[eeSSmask &  eeonZmask]
        eeSSoffZ = eepairs[eeSSmask & eeoffZmask]
        mmSSonZ  = mmpairs[mmSSmask &  mmonZmask]
        mmSSoffZ = mmpairs[mmSSmask & mmoffZmask]
        neeSS = len(ak.flatten(eeSSonZ)) + len(ak.flatten(eeSSoffZ))
        nmmSS = len(ak.flatten(mmSSonZ)) + len(ak.flatten(mmSSoffZ))

        lepSF_eeSS = GetLeptonSF(eepairs.e0.pt, eepairs.e0.eta, 'e', eepairs.e1.pt, eepairs.e1.eta, 'e', year=year)
        lepSF_mumuSS = GetLeptonSF(mmpairs.m0.pt, mmpairs.m0.eta, 'm', mmpairs.m1.pt, mmpairs.m1.eta, 'm', year=year)

        print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS))

        # Cuts
        eeSSmask   = (ak.num(eeSSmask[eeSSmask])>0)
        mmSSmask   = (ak.num(mmSSmask[mmSSmask])>0)
        eeonZmask  = (ak.num(eeonZmask[eeonZmask])>0)
        eeoffZmask = (ak.num(eeoffZmask[eeoffZmask])>0)
        mmonZmask  = (ak.num(mmonZmask[mmonZmask])>0)
        mmoffZmask = (ak.num(mmoffZmask[mmoffZmask])>0)
        emSSmask   = (ak.num(emSSmask[emSSmask])>0)


        ##################################################################
        ### 3 leptons
        ##################################################################

        # eem
        muon_eem = mu[(nElec==2)&(nMuon==1)&(mu.pt>-1)]
        elec_eem =  e[(nElec==2)&(nMuon==1)&( e.pt>-1)]
        ee_eem = ak.combinations(elec_eem, 2, fields=["e0", "e1"])

        ee_eemZmask     = (ee_eem.e0.pdgId*ee_eem.e1.pdgId<1)&(np.abs((ee_eem.e0+ee_eem.e1).mass-91.2)<10)
        ee_eemOffZmask  = (ee_eem.e0.pdgId*ee_eem.e1.pdgId<1)&(np.abs((ee_eem.e0+ee_eem.e1).mass-91.2)>10)
        ee_eemZmask     = (ak.num(ee_eemZmask[ee_eemZmask])>0)
        ee_eemOffZmask  = (ak.num(ee_eemOffZmask[ee_eemOffZmask])>0)

        eepair_eem  = (ee_eem.e0+ee_eem.e1)
        trilep_eem = eepair_eem+muon_eem #ak.cartesian({"e0":ee_eem.e0,"e1":ee_eem.e1, "m":muon_eem})

        lepSF_eem = GetLeptonSF(ee_eem.e0.pt, ee_eem.e0.eta, 'e', ee_eem.e1.pt, ee_eem.e1.eta, 'e', mu.pt, mu.eta, 'm', year)

        # mme
        muon_mme = mu[(nElec==1)&(nMuon==2)&(mu.pt>-1)]
        elec_mme =  e[(nElec==1)&(nMuon==2)&( e.pt>-1)]

        mm_mme = ak.combinations(muon_mme, 2, fields=["m0", "m1"])
        mm_mmeZmask     = (mm_mme.m0.pdgId*mm_mme.m1.pdgId<1)&(np.abs((mm_mme.m0+mm_mme.m1).mass-91.2)<10)
        mm_mmeOffZmask  = (mm_mme.m0.pdgId*mm_mme.m1.pdgId<1)&(np.abs((mm_mme.m0+mm_mme.m1).mass-91.2)>10)
        mm_mmeZmask     = (ak.num(mm_mmeZmask[mm_mmeZmask])>0)
        mm_mmeOffZmask  = (ak.num(mm_mmeOffZmask[mm_mmeOffZmask])>0)

        mmpair_mme     = (mm_mme.m0+mm_mme.m1)
        trilep_mme     = mmpair_mme+elec_mme

        mZ_mme  = mmpair_mme.mass
        mZ_eem  = eepair_eem.mass
        m3l_eem = trilep_eem.mass
        m3l_mme = trilep_mme.mass

        lepSF_mme = GetLeptonSF(mm_mme.m0.pt, mm_mme.m0.eta, 'm', mm_mme.m1.pt, mm_mme.m1.eta, 'm', e.pt, e.eta, 'e', year)

        # eee and mmm
        eee =   e[(nElec==3)&(nMuon==0)&( e.pt>-1)] 
        mmm =  mu[(nElec==0)&(nMuon==3)&(mu.pt>-1)] 

        eee_leps = ak.combinations(eee, 3, fields=["e0", "e1", "e2"])
        mmm_leps = ak.combinations(mmm, 3, fields=["m0", "m1", "m2"])
        ee_pairs = ak.combinations(eee, 2, fields=["e0", "e1"])
        mm_pairs = ak.combinations(mmm, 2, fields=["m0", "m1"])
        ee_pairs_index = ak.argcombinations(eee, 2, fields=["e0", "e1"])
        mm_pairs_index = ak.argcombinations(mmm, 2, fields=["m0", "m1"])

        lepSF_eee = GetLeptonSF(eee_leps.e0.pt, eee_leps.e0.eta, 'e', eee_leps.e1.pt, eee_leps.e1.eta, 'e', eee_leps.e2.pt, eee_leps.e2.eta, 'e', year)
        lepSF_mmm = GetLeptonSF(mmm_leps.m0.pt, mmm_leps.m0.eta, 'm', mmm_leps.m1.pt, mmm_leps.m1.eta, 'm', mmm_leps.m2.pt, mmm_leps.m2.eta, 'm', year)

        mmSFOS_pairs = mm_pairs[(np.abs(mm_pairs.m0.pdgId) == np.abs(mm_pairs.m1.pdgId)) & (mm_pairs.m0.pdgId != mm_pairs.m1.pdgId)]
        offZmask_mm = ak.all(np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2)>10., axis=1, keepdims=True) & (ak.num(mmSFOS_pairs)>0)
        onZmask_mm  = ak.any(np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2)<10., axis=1, keepdims=True)
      
        eeSFOS_pairs = ee_pairs[(np.abs(ee_pairs.e0.pdgId) == np.abs(ee_pairs.e1.pdgId)) & (ee_pairs.e0.pdgId != ee_pairs.e1.pdgId)]
        offZmask_ee = ak.all(np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2)>10, axis=1, keepdims=True) & (ak.num(eeSFOS_pairs)>0)
        onZmask_ee  = ak.any(np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2)<10, axis=1, keepdims=True)

        # Create masks **for event selection**
        eeeOnZmask  = (ak.num(onZmask_ee[onZmask_ee])>0)
        eeeOffZmask = (ak.num(offZmask_ee[offZmask_ee])>0)
        mmmOnZmask  = (ak.num(onZmask_mm[onZmask_mm])>0)
        mmmOffZmask = (ak.num(offZmask_mm[offZmask_mm])>0)

        # Now we need to create masks for the leptons in order to select leptons from the Z boson candidate (in onZ categories)
        ZeeMask = ak.argmin(np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2),axis=1,keepdims=True)
        ZmmMask = ak.argmin(np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2),axis=1,keepdims=True)
  
        Zee = eeSFOS_pairs[ZeeMask]
        Zmm = mmSFOS_pairs[ZmmMask]
        eZ0= Zee.e0[ak.num(eeSFOS_pairs)>0]
        eZ1= Zee.e1[ak.num(eeSFOS_pairs)>0]
        eZ = eZ0+eZ1
        mZ0= Zmm.m0[ak.num(mmSFOS_pairs)>0]
        mZ1= Zmm.m1[ak.num(mmSFOS_pairs)>0]
        mZ = mZ0+mZ1
        mZ_eee  = eZ.mass
        mZ_mmm  = mZ.mass

        # And for the W boson
        ZmmIndices = mm_pairs_index[ZmmMask]
        ZeeIndices = ee_pairs_index[ZeeMask]
        eW = eee[~ZeeIndices.e0 | ~ZeeIndices.e1]
        mW = mmm[~ZmmIndices.m0 | ~ZmmIndices.m1]

        triElec = eee_leps.e0+eee_leps.e1+eee_leps.e2
        triMuon = mmm_leps.m0+mmm_leps.m1+mmm_leps.m2
        m3l_eee = triElec.mass
        m3l_mmm = triMuon.mass
    
        # Triggers
        trig_eeSS = passTrigger(events,'ee',isData,dataset)
        trig_mmSS = passTrigger(events,'mm',isData,dataset)
        trig_emSS = passTrigger(events,'em',isData,dataset)
        trig_eee  = passTrigger(events,'eee',isData,dataset)
        trig_mmm  = passTrigger(events,'mmm',isData,dataset)
        trig_eem  = passTrigger(events,'eem',isData,dataset)
        trig_mme  = passTrigger(events,'mme',isData,dataset)

        # MET filters

        # Weights
        genw = np.ones_like(events['MET_pt']) if isData else events['genWeight']

        ### We need weights for: normalization, lepSF, triggerSF, pileup, btagSF...
        weights = {}
        for r in ['all', 'ee', 'mm', 'em', 'eee', 'mmm', 'eem', 'mme']:
          weights[r] = coffea.analysis_tools.Weights(len(events))
          weights[r].add('norm',genw if isData else (xsec/sow)*genw)

        weights['ee'].add('lepSF_eeSS', lepSF_eeSS)
        weights['em'].add('lepSF_emSS', lepSF_emSS)
        weights['mm'].add('lepSF_mmSS', lepSF_mumuSS)
        weights['eee'].add('lepSF_eee', lepSF_eee)
        weights['mmm'].add('lepSF_mmm', lepSF_mmm)
        weights['mme'].add('lepSF_mme', lepSF_mme)
        weights['eem'].add('lepSF_eem', lepSF_eem)

        # Extract the EFT quadratic coefficients and optionally use them to calculate the coefficients on the w**2 quartic function
        # eft_coeffs is never Jagged so convert immediately to numpy for ease of use.
        eft_coeffs = ak.to_numpy(events['EFTfitCoefficients']) if hasattr(events, "EFTfitCoefficients") else None
        eft_w2_coeffs = efth.calc_w2_coeffs(eft_coeffs,self._dtype) if (self._do_errors and eft_coeffs is not None) else None

        # Selections and cuts
        selections = PackedSelection()
        channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS']
        selections.add('eeSSonZ',  (eeonZmask)&(eeSSmask)&(trig_eeSS))
        selections.add('eeSSoffZ', (eeoffZmask)&(eeSSmask)&(trig_eeSS))
        selections.add('mmSSonZ',  (mmonZmask)&(mmSSmask)&(trig_mmSS))
        selections.add('mmSSoffZ', (mmoffZmask)&(mmSSmask)&(trig_mmSS))
        selections.add('emSS',     (emSSmask)&(trig_emSS))

        channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ']
        selections.add('eemSSonZ',   (ee_eemZmask)&(trig_eem))
        selections.add('eemSSoffZ',  (ee_eemOffZmask)&(trig_eem))
        selections.add('mmeSSonZ',   (mm_mmeZmask)&(trig_mme))
        selections.add('mmeSSoffZ',  (mm_mmeOffZmask)&(trig_mme))

        channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ']
        selections.add('eeeSSonZ',   (eeeOnZmask)&(trig_eee))
        selections.add('eeeSSoffZ',  (eeeOffZmask)&(trig_eee))
        selections.add('mmmSSonZ',   (mmmOnZmask)&(trig_mmm))
        selections.add('mmmSSoffZ',  (mmmOffZmask)&(trig_mmm))

        levels = ['base', '2jets', '4jets', '4j1b', '4j2b']
        selections.add('base', (nElec+nMuon>=2))
        selections.add('2jets',(njets>=2))
        selections.add('4jets',(njets>=4))
        selections.add('4j1b',(njets>=4)&(nbtags>=1))
        selections.add('4j2b',(njets>=4)&(nbtags>=2))

        # Variables
        invMass_eeSSonZ  = ( eeSSonZ.e0+ eeSSonZ.e1).mass
        invMass_eeSSoffZ = (eeSSoffZ.e0+eeSSoffZ.e1).mass
        invMass_mmSSonZ  = ( mmSSonZ.m0+ mmSSonZ.m1).mass
        invMass_mmSSoffZ = (mmSSoffZ.m0+mmSSoffZ.m1).mass
        invMass_emSS     = (emSS.e+emSS.m).mass

        varnames = {}
        varnames['met'] = met.pt
        varnames['ht'] = ht
        varnames['njets'] = njets
        varnames['nbtags'] = nbtags
        varnames['invmass'] = {
          'eeSSonZ'   : invMass_eeSSonZ,
          'eeSSoffZ'  : invMass_eeSSoffZ,
          'mmSSonZ'   : invMass_mmSSonZ,
          'mmSSoffZ'  : invMass_mmSSoffZ,
          'emSS'      : invMass_emSS,
          'eemSSonZ'  : mZ_eem,
          'eemSSoffZ' : mZ_eem,
          'mmeSSonZ'  : mZ_mme,
          'mmeSSoffZ' : mZ_mme,
          'eeeSSonZ'  : mZ_eee,
          'eeeSSoffZ' : mZ_eee,
          'mmmSSonZ'  : mZ_mmm,
          'mmmSSoffZ' : mZ_mmm,
        }
        varnames['m3l'] = {
          'eemSSonZ'  : m3l_eem,
          'eemSSoffZ' : m3l_eem,
          'mmeSSonZ'  : m3l_mme,
          'mmeSSoffZ' : m3l_mme,
          'eeeSSonZ'  : m3l_eee,
          'eeeSSoffZ' : m3l_eee,
          'mmmSSonZ'  : m3l_mmm,
          'mmmSSoffZ' : m3l_mmm,
        }
        varnames['e0pt' ] = e0.pt
        varnames['e0eta'] = e0.eta
        varnames['m0pt' ] = m0.pt
        varnames['m0eta'] = m0.eta
        varnames['e1pt' ] = e1
        varnames['e1eta'] = e1
        varnames['e2pt' ] = e2
        varnames['e2eta'] = e2
        varnames['m1pt' ] = m1
        varnames['m1eta'] = m1
        varnames['m2pt' ] = m2
        varnames['m2eta'] = m2
        varnames['j0pt' ] = j0.pt
        varnames['j0eta'] = j0.eta
        varnames['j1pt']  = j1
        varnames['j1eta'] = j1
        varnames['j2pt']  = j2
        varnames['j2eta'] = j2
        varnames['j3pt']  = j3
        varnames['j3eta'] = j3
        varnames['counts'] = np.ones_like(events.GenMET.pt)

        # fill Histos
        hout = self.accumulator.identity()
        normweights = weights['all'].weight().flatten() # Why does it not complain about .flatten() here?
        hout['SumOfEFTweights'].fill(sample=dataset, SumOfEFTweights=varnames['counts'], weight=normweights, eft_coeff=eft_coeffs, eft_err_coeff=eft_w2_coeffs)

        for var, v in varnames.items():
         for ch in channels2LSS+channels3L:
          for lev in levels:
            weight = weights[ ch[:3] if (ch.startswith('eee') or ch.startswith('mmm') or ch.startswith('eem') or ch.startswith('mme')) else ch[:2]].weight()
            cuts = [ch] + [lev]
            cut = selections.all(*cuts)
            weights_flat = weight[cut].flatten() # Why does it not complain about .flatten() here?
            weights_ones = np.ones_like(weights_flat, dtype=np.int)
            eft_coeffs_cut = eft_coeffs[cut] if eft_coeffs is not None else None
            eft_w2_coeffs_cut = eft_w2_coeffs[cut] if eft_w2_coeffs is not None else None
            if var == 'invmass':
              if   ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue
              elif ch in ['eeeSSonZ' , 'mmmSSonZ' ]: continue #values = v[ch]
              else                                 : values = ak.flatten(v[ch][cut])
              hout['invmass'].fill(eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, sample=dataset, channel=ch, cut=lev, invmass=values, weight=weights_flat)
            elif var == 'm3l': 
              if ch in ['eeSSonZ','eeSSoffZ', 'mmSSonZ', 'mmSSoffZ','emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ' , 'mmmSSonZ']: continue
              values = ak.flatten(v[ch][cut])
              hout['m3l'].fill(sample=dataset, channel=ch, cut=lev, m3l=values, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
            else:
              values = v[cut]
              if   var == 'ht'    : hout[var].fill(ht=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'met'   : hout[var].fill(met=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'njets' : hout[var].fill(njets=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'nbtags': hout[var].fill(nbtags=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'counts': hout[var].fill(counts=values, sample=dataset, channel=ch, cut=lev, weight=weights_ones)
              elif var == 'j0eta' : 
                if lev == 'base': continue
                values = ak.flatten(values)
                #values=np.asarray(values)
                hout[var].fill(j0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'e0pt'  : 
                if ch in ['mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ']: continue
                values = ak.flatten(values)
                #values=np.asarray(values)
                hout[var].fill(e0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut) 
              elif var == 'm0pt'  : 
                if ch in ['eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ']: continue
                values = ak.flatten(values)
                #values=np.asarray(values)
                hout[var].fill(m0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'e0eta' : 
                if ch in ['mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ']: continue
                values = ak.flatten(values)
                #values=np.asarray(values)
                hout[var].fill(e0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm0eta':
                if ch in ['eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ']: continue
                values = ak.flatten(values)
                #values=np.asarray(values)
                hout[var].fill(m0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'j0pt'  : 
                if lev == 'base': continue
                values = ak.flatten(values)
                #values=np.asarray(values)
                hout[var].fill(j0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'j1pt':
                if lev == "base": continue
                values = values.pt[:,1]
                #values = ak.flatten(values)
                hout[var].fill(j1pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var =='j1eta':
                if lev == 'base': continue
                values = values.eta[:,1]
                hout[var].fill(j1eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'j2pt':
                if lev in ['base', "2jets"]: continue
                values = values.pt[:,2]
                hout[var].fill(j2pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'j2eta':
                if lev in ['base', "2jets"]: continue
                values = values.eta[:,2]
                hout[var].fill(j2eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'j3pt':
                if lev in ['base', "2jets"]: continue
                values = values.pt[:,3]
                hout[var].fill(j3pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'j3eta':
                if lev in ['base', "2jets"]: continue
                values = values.eta[:,3]
                hout[var].fill(j3eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'e1pt':
                if ch in ['mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ', 'mmeSSonZ', 'mmeSSoffZ', 'emSS']: continue
                values = values.pt[:,1]
                hout[var].fill(e1pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'e1eta':
                if ch in ['mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ', 'mmeSSonZ', 'mmeSSoffZ', 'emSS']: continue
                values = values.eta[:,1]
                hout[var].fill(e1eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'e2pt':
                if ch in ['eeeSSonZ', 'eeeSSoffZ']:
                  values = values.pt[:,2]
                  hout[var].fill(e2pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'e2eta':
                if ch in ['eeeSSonZ', 'eeeSSoffZ']:
                  values = values.eta[:,2]
                  hout[var].fill(e2eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm1pt':
                if ch in ['eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ', 'eemSSonZ', 'eemSSoffZ', 'emSS']: continue
                values = values.pt[:,1]
                hout[var].fill(m1pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm1eta':
                if ch in ['eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ', 'eemSSonZ', 'eemSSoffZ', 'emSS']: continue
                values = values.eta[:,1]
                hout[var].fill(m1eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm2pt':
                if ch in ['mmmSSonZ', 'mmmSSoffZ']:
                  values = values.pt[:,2]
                  hout[var].fill(m2pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm2eta':
                if ch in ['mmmSSonZ', 'mmmSSoffZ']:
                  values = values.eta[:,2]
                  hout[var].fill(m2eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
        return hout
Ejemplo n.º 12
0
    def process(self, events):
        dataset = events.metadata['dataset']
        isRealData = not hasattr(events, "genWeight")
        selection = PackedSelection()
        weights = Weights(len(events))
        output = self.accumulator.identity()
        if not isRealData:
            output['sumw'][dataset] += ak.sum(events.genWeight)

        if isRealData:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._triggers[self._year]:
                trigger = trigger | events.HLT[t]
        else:
            trigger = np.ones(len(events), dtype='bool')
        selection.add('trigger', trigger)

        if isRealData:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._muontriggers[self._year]:
                trigger = trigger | events.HLT[t]
        else:
            trigger = np.ones(len(events), dtype='bool')
        selection.add('muontrigger', trigger)

        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)
        fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year]

        candidatejet = fatjets[
            # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
            & fatjets.isTight  # this is loose in sampleContainer
        ]
        if self._jet_arbitration == 'pt':
            candidatejet = ak.firsts(candidatejet)
        elif self._jet_arbitration == 'mass':
            candidatejet = candidatejet[ak.argmax(candidatejet.msdcorr)]
        elif self._jet_arbitration == 'n2':
            candidatejet = candidatejet[ak.argmin(candidatejet.n2ddt)]
        elif self._jet_arbitration == 'ddb':
            candidatejet = candidatejet[ak.argmax(candidatejet.btagDDBvL)]
        else:
            raise RuntimeError("Unknown candidate jet arbitration")

        selection.add('minjetkin', (candidatejet.pt >= 450)
                      & (candidatejet.msdcorr >= 40.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('jetacceptance', (candidatejet.msdcorr >= 47.)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr < 201.))
        selection.add('jetid', candidatejet.isTight)
        selection.add('n2ddt', (candidatejet.n2ddt < 0.))
        selection.add('ddbpass', (candidatejet.btagDDBvL >= 0.89))

        jets = events.Jet[(events.Jet.pt > 30.)
                          & (abs(events.Jet.eta) < 2.5)
                          & events.Jet.isTight]
        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        dphi = abs(jets.delta_phi(candidatejet))
        selection.add(
            'antiak4btagMediumOppHem',
            ak.max(
                jets[dphi > np.pi / 2].btagDeepB, axis=1, mask_identity=False)
            < BTagEfficiency.btagWPs[self._year]['medium'])
        ak4_away = jets[dphi > 0.8]
        selection.add(
            'ak4btagMedium08',
            ak.max(ak4_away.btagDeepB, axis=1, mask_identity=False) >
            BTagEfficiency.btagWPs[self._year]['medium'])

        selection.add('met', events.MET.pt < 140.)

        goodmuon = ((events.Muon.pt > 10)
                    & (abs(events.Muon.eta) < 2.4)
                    & (events.Muon.pfRelIso04_all < 0.25)
                    & events.Muon.looseId)
        nmuons = ak.sum(goodmuon, axis=1)
        leadingmuon = ak.firsts(events.Muon[goodmuon])

        nelectrons = ak.sum(
            (events.Electron.pt > 10)
            & (abs(events.Electron.eta) < 2.5)
            & (events.Electron.cutBased >= events.Electron.LOOSE),
            axis=1,
        )

        ntaus = ak.sum(
            (events.Tau.pt > 20)
            & events.Tau.idDecayMode,  # bacon iso looser than Nano selection
            axis=1,
        )

        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon',
                      (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin',
                      (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1))
        selection.add('muonDphiAK8',
                      abs(leadingmuon.delta_phi(candidatejet)) > 2 * np.pi / 3)

        if isRealData:
            genflavor = 0
        else:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            bosons = getBosons(events.GenPart)
            matchedBoson = candidatejet.nearest(bosons,
                                                axis=None,
                                                threshold=0.8)
            genflavor = bosonFlavor(matchedBoson)
            genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
            add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            add_jetTriggerWeight(weights, candidatejet.msdcorr,
                                 candidatejet.pt, self._year)
            output['btagWeight'].fill(dataset=dataset,
                                      val=self._btagSF.addBtagWeight(
                                          weights, ak4_away))
            logger.debug("Weight statistics: %r" % weights.weightStatistics)

        msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * (
            genflavor > 0) + candidatejet.msdcorr * (genflavor == 0)

        regions = {
            'signal': [
                'trigger', 'minjetkin', 'jetacceptance', 'jetid', 'n2ddt',
                'antiak4btagMediumOppHem', 'met', 'noleptons'
            ],
            'muoncontrol': [
                'muontrigger', 'minjetkin', 'jetacceptance', 'jetid', 'n2ddt',
                'ak4btagMedium08', 'onemuon', 'muonkin', 'muonDphiAK8'
            ],
            'noselection': [],
        }

        for region, cuts in regions.items():
            allcuts = set()
            output['cutflow'].fill(dataset=dataset,
                                   region=region,
                                   genflavor=genflavor,
                                   cut=0,
                                   weight=weights.weight())
            for i, cut in enumerate(cuts + ['ddbpass']):
                allcuts.add(cut)
                cut = selection.all(*allcuts)
                output['cutflow'].fill(dataset=dataset,
                                       region=region,
                                       genflavor=genflavor[cut],
                                       cut=i + 1,
                                       weight=weights.weight()[cut])

        systematics = [
            None,
            'jet_triggerUp',
            'jet_triggerDown',
            'btagWeightUp',
            'btagWeightDown',
            'btagEffStatUp',
            'btagEffStatDown',
        ]

        def normalize(val, cut):
            return ak.to_numpy(ak.fill_none(val[cut], np.nan))

        def fill(region, systematic, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            if wmod is None:
                weight = weights.weight(modifier=systematic)[cut]
            else:
                weight = weights.weight()[cut] * wmod[cut]

            output['templates'].fill(
                dataset=dataset,
                region=region,
                systematic=sname,
                genflavor=genflavor[cut],
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(msd_matched, cut),
                ddb=normalize(candidatejet.btagDDBvL, cut),
                weight=weight,
            )
            if wmod is not None:
                output['genresponse_noweight'].fill(
                    dataset=dataset,
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=events.genWeight[cut] * wmod[cut],
                )
                output['genresponse'].fill(
                    dataset=dataset,
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=weight,
                )

        for region in regions:
            cut = selection.all(*(set(regions[region]) - {'n2ddt'}))
            output['nminus1_n2ddt'].fill(
                dataset=dataset,
                region=region,
                n2ddt=normalize(candidatejet.n2ddt, cut),
                weight=weights.weight()[cut],
            )
            for systematic in systematics:
                fill(region, systematic)
            if 'GluGluHToBB' in dataset:
                for i in range(9):
                    fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:,
                                                                          i])
                for c in events.LHEWeight.columns[1:]:
                    fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c])

        output["weightStats"] = weights.weightStatistics
        return output
Ejemplo n.º 13
0
    def process(self, events):
        np.random.seed(
            10
        )  # sets seed so values from random distributions are reproducible (JER corrections)
        output = self.accumulator.identity()

        self.sample_name = events.metadata['dataset']

        ## make event weights
        # data or MC distinction made internally
        mu_evt_weights = MCWeights.get_event_weights(
            events, year=args.year, corrections=self.corrections)
        el_evt_weights = MCWeights.get_event_weights(
            events, year=args.year, corrections=self.corrections)

        ## initialize selections
        selection = PackedSelection()

        # get all passing leptons
        lep_and_filter_pass = objsel.select_leptons(
            events, year=args.year)  #, cutflow=output['cutflow'])
        selection.add('lep_and_filter_pass', lep_and_filter_pass
                      )  # add passing leptons requirement to all systematics

        ## build corrected jets and MET
        events['Jet'], events['MET'] = IDJet.process_jets(
            events, args.year, self.corrections['JetCor'])

        runs = events.run
        lumis = events.luminosityBlock
        Golden_Json_LumiMask = lumi_tools.LumiMask(lumiMask_path)
        LumiMask = Golden_Json_LumiMask.__call__(
            runs, lumis)  ## returns array of valid events
        selection.add('lumimask', LumiMask)
        ## object selection and add different selections
        if isSM_Data_:
            ## muons
            selection.add(
                'tight_MU',
                ak.sum(events['Muon']['TIGHTMU'],
                       axis=1) == 1)  # one muon passing TIGHT criteria
            selection.add(
                'loose_MU',
                ak.sum(events['Muon']['LOOSEMU'],
                       axis=1) == 1)  # one muon passing LOOSE criteria
        if isSE_Data_:
            ## electrons
            selection.add(
                'tight_EL',
                ak.sum(events['Electron']['TIGHTEL'],
                       axis=1) == 1)  # one electron passing TIGHT criteria
            selection.add(
                'loose_EL',
                ak.sum(events['Electron']['LOOSEEL'],
                       axis=1) == 1)  # one electron passing LOOSE criteria

        output['cutflow']['lep_and_filter_pass'] += ak.sum(lep_and_filter_pass)
        # jet selection
        passing_jets = objsel.jets_selection(events,
                                             year=args.year,
                                             cutflow=output['cutflow'])
        output['cutflow'][
            'nEvts passing jet and lepton obj selection'] += ak.sum(
                passing_jets & lep_and_filter_pass)
        selection.add('passing_jets', passing_jets)
        selection.add('jets_3', ak.num(events['SelectedJets']) == 3)
        selection.add('jets_4p',
                      ak.num(events['SelectedJets']) >
                      3)  # only for getting btag weights
        selection.add('DeepCSV_pass',
                      ak.sum(events['SelectedJets'][btag_wps[0]], axis=1) >= 2)

        # sort jets by btag value
        events['SelectedJets'] = events['SelectedJets'][ak.argsort(
            events['SelectedJets']['btagDeepB'], ascending=False
        )] if btaggers[0] == 'DeepCSV' else events['SelectedJets'][ak.argsort(
            events['SelectedJets']['btagDeepFlavB'], ascending=False)]

        # btag fail sideband
        deepcsv_sorted = events['SelectedJets'][ak.argsort(
            events['SelectedJets']['btagDeepB'], ascending=False)]['btagDeepB']
        valid_counts_inds = ak.where(ak.num(events['SelectedJets']) > 1)[0]
        deepcsv_fail = np.zeros(len(events)).astype(bool)
        deepcsv_fail[valid_counts_inds] = (
            deepcsv_sorted[valid_counts_inds][:, 0] <
            btag_values[args.year]['btagDeepB']['DeepCSV' + wps_to_use[0]]) & (
                deepcsv_sorted[valid_counts_inds][:, 1] <
                btag_values[args.year]['btagDeepB']['DeepCSV' + wps_to_use[0]])
        selection.add(
            'DeepCSV_fail', deepcsv_fail
        )  # highest and second highest DeepCSV values don't pass tight and loose WPs

        ## fill hists for each region
        for lepton in self.regions.keys():
            evt_weights = mu_evt_weights if lepton == 'Muon' else el_evt_weights
            for leptype in self.regions[lepton].keys():
                for btagregion in self.regions[lepton][leptype].keys():
                    for jmult in self.regions[lepton][leptype][
                            btagregion].keys():
                        cut = selection.all(
                            *self.regions[lepton][leptype][btagregion][jmult])
                        #set_trace()

                        output['cutflow']['nEvts %s' % ', '.join([
                            lepton, leptype, btagregion, jmult
                        ])] += cut.sum()

                        if to_debug: print(lepton, leptype, btagregion, jmult)
                        if cut.sum() > 0:
                            ltype = 'MU' if lepton == 'Muon' else 'EL'
                            if 'loose_or_tight_%s' % ltype in self.regions[
                                    lepton][leptype][btagregion][jmult]:
                                leptons = events[lepton][cut][(
                                    (events[lepton][cut]['TIGHT%s' % ltype]
                                     == True) |
                                    (events[lepton][cut]['LOOSE%s' % ltype]
                                     == True))]
                            elif 'tight_%s' % ltype in self.regions[lepton][
                                    leptype][btagregion][jmult]:
                                leptons = events[lepton][cut][(
                                    events[lepton][cut]['TIGHT%s' %
                                                        ltype] == True)]
                            elif 'loose_%s' % ltype in self.regions[lepton][
                                    leptype][btagregion][jmult]:
                                leptons = events[lepton][cut][(
                                    events[lepton][cut]['LOOSE%s' %
                                                        ltype] == True)]
                            else:
                                raise ValueError(
                                    "Not sure what lepton type to choose for event"
                                )

                                # get jets and MET
                            jets, met = events['SelectedJets'][cut], events[
                                'SelectedMET'][cut]

                            # find best permutations
                            best_perms = ttpermutator.find_best_permutations(
                                jets=jets,
                                leptons=leptons,
                                MET=met,
                                btagWP=btag_wps[0],
                                btag_req=False
                                if btagregion == 'btagFail' else True)
                            valid_perms = ak.num(best_perms['TTbar'].pt) > 0
                            output['cutflow'][
                                'nEvts %s: valid perms' %
                                ', '.join([lepton, leptype, btagregion, jmult
                                           ])] += ak.sum(valid_perms)

                            bp_status = np.zeros(
                                cut.size, dtype=int
                            )  # 0 == '' (no gen matching), 1 == 'right', 2 == 'matchable', 3 == 'unmatchable', 4 == 'sl_tau', 5 == 'noslep'

                            ## create MT regions
                            MT = make_vars.MT(leptons, met)
                            MTHigh = ak.flatten(MT[valid_perms] >= MTcut)
                            output['cutflow'][
                                'nEvts %s: pass MT cut' %
                                ', '.join([lepton, leptype, btagregion, jmult
                                           ])] += ak.sum(MTHigh)

                            # fill hists for before/after HEM issue
                            wts = evt_weights.weight(
                            )[cut][valid_perms][MTHigh]
                            valid_runs = runs[cut][valid_perms][MTHigh]
                            runs_before = ak.where(valid_runs < hem_run_after)
                            runs_after = ak.where(valid_runs >= hem_run_after)
                            if len(runs_before[0]) > 0:
                                output = self.fill_hists(
                                    acc=output,
                                    hem_reg='Before',
                                    jetmult=jmult,
                                    leptype=lepton,
                                    lepcat=leptype,
                                    btagregion=btagregion,
                                    permarray=bp_status[cut][valid_perms]
                                    [MTHigh][runs_before],
                                    perm=best_perms[valid_perms][MTHigh]
                                    [runs_before],
                                    jets=jets[valid_perms][MTHigh]
                                    [runs_before],
                                    leptons=leptons[valid_perms][MTHigh]
                                    [runs_before],
                                    MTvals=MT[valid_perms][MTHigh]
                                    [runs_before],
                                    evt_wts=wts[runs_before])
                            if len(runs_after[0]) > 0:
                                output = self.fill_hists(
                                    acc=output,
                                    hem_reg='After',
                                    jetmult=jmult,
                                    leptype=lepton,
                                    lepcat=leptype,
                                    btagregion=btagregion,
                                    permarray=bp_status[cut][valid_perms]
                                    [MTHigh][runs_after],
                                    perm=best_perms[valid_perms][MTHigh]
                                    [runs_after],
                                    jets=jets[valid_perms][MTHigh][runs_after],
                                    leptons=leptons[valid_perms][MTHigh]
                                    [runs_after],
                                    MTvals=MT[valid_perms][MTHigh][runs_after],
                                    evt_wts=wts[runs_after])

        return output
Ejemplo n.º 14
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Generated leptons
        gen_lep = ev.GenL
        leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
        trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
        n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        ## event selectors
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        
        dilep     = ((ak.num(electron) + ak.num(muon))==2)
        pos_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))>0)
        lep0pt    = ((ak.num(electron[(electron.pt>25)]) + ak.num(muon[(muon.pt>25)]))>0)
        lep0pt_40 = ((ak.num(electron[(electron.pt>40)]) + ak.num(muon[(muon.pt>40)]))>0)
        lep0pt_100 = ((ak.num(electron[(electron.pt>100)]) + ak.num(muon[(muon.pt>100)]))>0)
        lep1pt    = ((ak.num(electron[(electron.pt>20)]) + ak.num(muon[(muon.pt>20)]))>1)
        lep1pt_30 = ((ak.num(electron[(electron.pt>30)]) + ak.num(muon[(muon.pt>30)]))>1)
        lepveto   = ((ak.num(vetoelectron) + ak.num(vetomuon))==2)
        
        # define the weight
        weight = Weights( len(ev) )
        
        #mult = 1
        #if dataset=='inclusive': mult = 0.0478/47.448
        #if dataset=='plus': mult = 0.0036/7.205

        if not dataset=='MuonEG':
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        selection = PackedSelection()
        selection.add('lepveto',       lepveto)
        selection.add('dilep',         dilep )
        selection.add('filter',        (filters) )
        selection.add('p_T(lep0)>25',  lep0pt )
        selection.add('p_T(lep0)>40',  lep0pt_40 )
        selection.add('p_T(lep1)>20',  lep1pt )
        selection.add('p_T(lep1)>30',  lep1pt_30 )
        selection.add('SS',            ( SSlepton | SSelectron | SSmuon) )
        selection.add('pos',           ( pos_charge ) )
        selection.add('neg',           ( neg_charge ) )
        selection.add('N_jet>3',       (ak.num(jet)>=4) )
        selection.add('N_jet>4',       (ak.num(jet)>=5) )
        selection.add('N_central>2',   (ak.num(central)>=3) )
        selection.add('N_central>3',   (ak.num(central)>=4) )
        selection.add('N_btag>0',      (ak.num(btag)>=1) )
        selection.add('MET>50',        (ev.MET.pt>50) )
        selection.add('ST',            (st>600) )
        selection.add('N_fwd>0',       (ak.num(fwd)>=1 ))
        selection.add('delta_eta',     (ak.any(delta_eta>2, axis=1) ) )
        selection.add('fwd_p>500',     (ak.any(j_fwd.p>500, axis=1) ) )
        
        ss_reqs = ['lepveto', 'dilep', 'SS', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'N_jet>3', 'N_central>2', 'N_btag>0']
        bl_reqs = ss_reqs + ['N_fwd>0', 'N_jet>4', 'N_central>3', 'ST', 'MET>50', 'delta_eta']
        sr_reqs = bl_reqs + ['fwd_p>500', 'p_T(lep0)>40', 'p_T(lep1)>30']

        ss_reqs_d = { sel: True for sel in ss_reqs }
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = { sel: True for sel in bl_reqs }
        BL = selection.require(**bl_reqs_d)
        sr_reqs_d = { sel: True for sel in sr_reqs }
        SR = selection.require(**sr_reqs_d)

        cutflow     = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in sr_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow( req, selection.require(**cutflow_reqs_d) )
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvs, weight=weight.weight()[ss_selection])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvsGood, weight=weight.weight()[ss_selection])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[ss_selection], weight=weight.weight()[ss_selection])
        output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL])
        output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL])
        output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL])
        output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL])
        output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL])
        output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])
        output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[ss_selection], n2=n_nonprompt[ss_selection], n_ele=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[ss_selection].pt,
            phi  = ev.MET[ss_selection].phi,
            weight = weight.weight()[ss_selection]
        )

        output['lead_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )

        output['trail_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )
        
        
        return output
Ejemplo n.º 15
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()
        
        # Initialize dict accumulators, if have not been initialized
        for jet in [0, 1, 2]:
            if dataset_name not in output[f"eta_{jet}_final"].keys():
                output[f"eta_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"ptoverM_{jet}_final"].keys():
                output[f"ptoverM_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            if dataset_name not in output[f"dEta_{pair[0]}{pair[1]}_final"].keys():
                output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dR_{pair[0]}{pair[1]}_final"].keys():
                output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"moverM_{pair[0]}{pair[1]}_final"].keys():
                output[f"moverM_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            if dataset_name not in output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_max_final"].keys():
            output[f"ptoverM_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_min_final"].keys():
            output[f"ptoverM_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"eta_max_final"].keys():
            output[f"eta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_max_final"].keys():
            output[f"dR_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_min_final"].keys():
            output[f"dR_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_max_final"].keys():
            output[f"dEta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_min_final"].keys():
            output[f"dEta_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_max_final"].keys():
            output[f"dR_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_min_final"].keys():
            output[f"dR_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_max_final"].keys():
            output[f"dEta_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_min_final"].keys():
            output[f"dEta_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_max_final"].keys():
            output[f"dPhi_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_min_final"].keys():
            output[f"dPhi_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_max_final"].keys():
            output[f"dPtoverM_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_min_final"].keys():
            output[f"dPtoverM_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name: #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        
        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]
        
        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0]
        
        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta)
        
        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] + selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass
        
        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:,0] / m3j
        m_12_overM = m_ij[:,1] / m3j
        m_20_overM = m_ij[:,2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt - (selected_jets[:, 1] + selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt - (selected_jets[:, 2] + selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt - (selected_jets[:, 0] + selected_jets[:, 1]).pt) / m3j
        
        # Event selection masks
        # selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(**{name: True for name in selection.names})
        # selection_masks["Pre-selection"] = sel_mask
        
        output["selected_events"][dataset_name] += events_3j[sel_mask].__len__()
        
        for jet in [0, 1, 2]:
            output[f"eta_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(selected_jets[:, jet][sel_mask].eta))
            output[f"ptoverM_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(pt_i_overM[:, jet][sel_mask]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_ij[:, pair[0]][sel_mask]))
            output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_ij[:, pair[0]][sel_mask]))
        
        output[f"moverM_01_final"][dataset_name] += processor.column_accumulator(np.array(m_01_overM[sel_mask]))
        output[f"moverM_12_final"][dataset_name] += processor.column_accumulator(np.array(m_12_overM[sel_mask]))
        output[f"moverM_20_final"][dataset_name] += processor.column_accumulator(np.array(m_20_overM[sel_mask]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_i_jk[:, pair[0]][sel_mask]))
            output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_i_jk[:, pair[0]][sel_mask]))
            output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dPhi_i_jk[:, pair[0]][sel_mask]))
        
        output[f"dPtoverM_0_12_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_0_12[sel_mask]))
        output[f"dPtoverM_1_20_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_1_20[sel_mask]))
        output[f"dPtoverM_2_01_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_2_01[sel_mask]))
        
        max_pt_overM_2fill = awk.max(pt_i_overM[sel_mask], axis=1)
        min_pt_overM_2fill = awk.min(pt_i_overM[sel_mask], axis=1)
        max_dR_2fill   = awk.max(dR_ij[sel_mask], axis=1)
        max_dEta_2fill = awk.max(dEta_ij[sel_mask], axis=1)
        min_dR_2fill   = awk.min(dR_ij[sel_mask], axis=1)
        min_dEta_2fill = awk.min(dEta_ij[sel_mask], axis=1)
        min_pt_2fill   = awk.min(selected_jets[sel_mask].pt, axis=1)
        max_eta_2fill  = awk.max(abs(selected_jets[sel_mask].eta), axis=1)
        max_dR_i_jk_2fill = awk.max(dR_i_jk[sel_mask], axis=1)
        min_dR_i_jk_2fill = awk.min(dR_i_jk[sel_mask], axis=1)
        max_dEta_i_jk_2fill = awk.max(dEta_i_jk[sel_mask], axis=1)
        min_dEta_i_jk_2fill = awk.min(dEta_i_jk[sel_mask], axis=1)
        max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk[sel_mask], axis=1)
        min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk[sel_mask], axis=1)
        max_dPtoverM_i_jk_2fill = []
        min_dPtoverM_i_jk_2fill = []
        dPtoverM_0_12_2fill = dPtoverM_0_12[sel_mask]
        dPtoverM_1_20_2fill = dPtoverM_1_20[sel_mask]
        dPtoverM_2_01_2fill = dPtoverM_2_01[sel_mask]
        for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill, dPtoverM_2_01_2fill):
            max_dPtoverM_i_jk_2fill.append(max(pair))
            min_dPtoverM_i_jk_2fill.append(min(pair))
        max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
        min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
        max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
        max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
        min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
        min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
        min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
        max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
        max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
        min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
        max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
        min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
        max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
        min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)
        
        output[f"ptoverM_max_final"][dataset_name] += processor.column_accumulator(np.array(max_pt_overM_2fill))
        output[f"ptoverM_min_final"][dataset_name] += processor.column_accumulator(np.array(min_pt_overM_2fill))
        output[f"eta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_eta_2fill))
        output[f"dR_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_2fill))
        output[f"dR_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_2fill))
        output[f"dEta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_2fill))
        output[f"dEta_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_2fill))
        output[f"dR_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_i_jk_2fill))
        output[f"dR_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_i_jk_2fill))
        output[f"dEta_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_i_jk_2fill))
        output[f"dEta_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_i_jk_2fill))
        output[f"dPhi_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPhi_i_jk_2fill))
        output[f"dPhi_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPhi_i_jk_2fill))
        output[f"dPtoverM_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPtoverM_i_jk_2fill))
        output[f"dPtoverM_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPtoverM_i_jk_2fill))

        return output 
Ejemplo n.º 16
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 15) & (np.abs(electron.eta) < 2.4)]

        electron = electron[(electron.genPartIdx >= 0)]
        electron = electron[(np.abs(electron.matched_gen.pdgId) == 11
                             )]  #from here on all leptons are gen-matched
        electron = electron[((electron.genPartFlav == 1) |
                             (electron.genPartFlav
                              == 15))]  #and now they are all prompt

        is_flipped = (((electron.matched_gen.pdgId * (-1) == electron.pdgId) |
                       (find_first_parent(electron.matched_gen) *
                        (-1) == electron.pdgId)) &
                      (np.abs(electron.pdgId) == 11))

        flipped_electron = electron[is_flipped]
        n_flips = ak.num(flipped_electron)

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        leading_flipped_electron_idx = ak.singletons(
            ak.argmax(flipped_electron.pt, axis=1))
        leading_flipped_electron = electron[leading_flipped_electron_idx]

        def getMVAscore(electron):
            if self.year == 2016:
                MVA = electron.mvaSpring16GP
                return MVA
            elif self.year == 2017:
                MVA = electron.mvaFall17V2noIso
                return MVA
            elif self.year == 2018:
                MVA = np.minimum(
                    np.maximum(electron.mvaFall17V2noIso, -1.0 + 1.e-6),
                    1.0 - 1.e-6)
                return -0.5 * np.log(2 / (MVA + 1) - 1)

        # setting up the various weights
        #weight = Weights( len(ev) )

        #if not dataset=='MuonEG':
        # generator weight
        # weight.add("weight", ev.genWeight)

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        electr = ((ak.num(electron) >= 1))
        flip = (n_flips >= 1)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('electr', electr)
        selection.add('flip', flip)

        bl_reqs = ['filter', 'electr']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        f_reqs = bl_reqs + ['flip']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)

        #adjust weights to prevent length mismatch
        #ak_weight_gen = ak.ones_like(electron[baseline].pt) * weight.weight()[baseline]
        #ak_weight_flip = ak.ones_like(flipped_electron[flip_sel].pt) * weight.weight()[flip_sel]

        #output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[baseline], weight=weight.weight()[baseline])
        #output['electron_flips'].fill(dataset=dataset, multiplicity=n_flips[baseline], weight=weight.weight()[baseline])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(electron[baseline].pt)),
            eta=abs(ak.to_numpy(ak.flatten(electron[baseline].eta))),
        )

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(electron[baseline].pt)),
            eta=ak.to_numpy(ak.flatten(electron[baseline].eta)),
        )

        output["flipped_electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(flipped_electron[flip_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(flipped_electron[flip_sel].eta))),
        )

        output["flipped_electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(flipped_electron[flip_sel].pt)),
            eta=ak.to_numpy(ak.flatten(flipped_electron[flip_sel].eta)),
        )

        output["mva_id"].fill(
            dataset=dataset,
            mva_id=ak.to_numpy(ak.flatten(getMVAscore(electron)[baseline])),
            eta=np.abs(ak.to_numpy(ak.flatten(electron.etaSC[baseline]))),
        )

        output["mva_id2"].fill(
            dataset=dataset,
            mva_id=ak.to_numpy(ak.flatten(getMVAscore(electron)[baseline])),
            pt=ak.to_numpy(ak.flatten(electron.pt[baseline])),
        )

        output["isolation"].fill(
            dataset=dataset,
            isolation1=ak.to_numpy(ak.flatten(electron.jetRelIso[baseline])),
            isolation2=ak.to_numpy(ak.flatten(electron.jetPtRelv2[baseline])),
        )

        return output
Ejemplo n.º 17
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:  #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt -
                            (selected_jets[:, 1] +
                             selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt -
                            (selected_jets[:, 2] +
                             selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt -
                            (selected_jets[:, 0] +
                             selected_jets[:, 1]).pt) / m3j

        # Event selection masks
        selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(
            **{name: True
               for name in selection.names})
        selection_masks["Pre-selection"] = sel_mask

        # HLT_trigger (this is already done at the beginning)
        # if year == "2016":
        # JetHLT_mask = []
        # if "2016B2" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # elif "2016H" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # else:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2017":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2018":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]

        # Fill histograms
        for selection, selection_mask in selection_masks.items():
            output["mjjj"].fill(dataset=dataset_name,
                                selection=selection,
                                mjjj=m3j[selection_mask])

            output["m_ij"].fill(dataset=dataset_name,
                                selection=selection,
                                m_01=m_ij[:, 0][selection_mask],
                                m_12=m_ij[:, 1][selection_mask],
                                m_20=m_ij[:, 2][selection_mask])

            output["dR_ij"].fill(dataset=dataset_name,
                                 selection=selection,
                                 dR_01=dR_ij[:, 0][selection_mask],
                                 dR_12=dR_ij[:, 1][selection_mask],
                                 dR_20=dR_ij[:, 2][selection_mask])

            output["dEta_ij"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dEta_01=dEta_ij[:, 0][selection_mask],
                                   dEta_12=dEta_ij[:, 1][selection_mask],
                                   dEta_20=dEta_ij[:, 2][selection_mask])

            output["moverM_ij"].fill(dataset=dataset_name,
                                     selection=selection,
                                     moverM_01=m_01_overM[selection_mask],
                                     moverM_12=m_12_overM[selection_mask],
                                     moverM_20=m_20_overM[selection_mask])

            output["pt_i"].fill(dataset=dataset_name,
                                selection=selection,
                                pt_0=selected_jets[:, 0][selection_mask].pt,
                                pt_1=selected_jets[:, 1][selection_mask].pt,
                                pt_2=selected_jets[:, 2][selection_mask].pt)

            output["eta_i"].fill(dataset=dataset_name,
                                 selection=selection,
                                 eta_0=selected_jets[:, 0][selection_mask].eta,
                                 eta_1=selected_jets[:, 1][selection_mask].eta,
                                 eta_2=selected_jets[:, 2][selection_mask].eta)

            output["ptoverM_i"].fill(dataset=dataset_name,
                                     selection=selection,
                                     ptoverM_0=pt_i_overM[:,
                                                          0][selection_mask],
                                     ptoverM_1=pt_i_overM[:,
                                                          1][selection_mask],
                                     ptoverM_2=pt_i_overM[:,
                                                          2][selection_mask])

            output["dR_i_jk"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dR_0_12=dR_i_jk[:, 0][selection_mask],
                                   dR_1_20=dR_i_jk[:, 1][selection_mask],
                                   dR_2_01=dR_i_jk[:, 2][selection_mask])

            output["dEta_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dEta_0_12=dEta_i_jk[:, 0][selection_mask],
                                     dEta_1_20=dEta_i_jk[:, 1][selection_mask],
                                     dEta_2_01=dEta_i_jk[:, 2][selection_mask])

            output["dPhi_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dPhi_0_12=dPhi_i_jk[:, 0][selection_mask],
                                     dPhi_1_20=dPhi_i_jk[:, 1][selection_mask],
                                     dPhi_2_01=dPhi_i_jk[:, 2][selection_mask])

            output["dPtoverM_i_jk"].fill(
                dataset=dataset_name,
                selection=selection,
                dPtoverM_0_12=dPtoverM_0_12[selection_mask],
                dPtoverM_1_20=dPtoverM_1_20[selection_mask],
                dPtoverM_2_01=dPtoverM_2_01[selection_mask])
            pt_i_overM_2fill = pt_i_overM[selection_mask]
            dR_ij_2fill = dR_ij[selection_mask]
            dEta_ij_2fill = dEta_ij[selection_mask]
            dR_i_jk_2fill = dR_i_jk[selection_mask]
            dEta_i_jk_2fill = dEta_i_jk[selection_mask]
            dPhi_i_jk_2fill = dPhi_i_jk[selection_mask]
            dPtoverM_0_12_2fill = dPtoverM_0_12[selection_mask]
            dPtoverM_1_20_2fill = dPtoverM_1_20[selection_mask]
            dPtoverM_2_01_2fill = dPtoverM_2_01[selection_mask]
            selected_jets_2fill = selected_jets[selection_mask]

            max_pt_overM_2fill = awk.max(pt_i_overM_2fill, axis=1)
            min_pt_overM_2fill = awk.min(pt_i_overM_2fill, axis=1)
            max_dR_2fill = awk.max(dR_ij_2fill, axis=1)
            max_dEta_2fill = awk.max(dEta_ij_2fill, axis=1)
            min_dR_2fill = awk.min(dR_ij_2fill, axis=1)
            min_dEta_2fill = awk.min(dEta_ij_2fill, axis=1)
            min_pt_2fill = awk.min(selected_jets_2fill.pt, axis=1)
            max_eta_2fill = awk.max(abs(selected_jets_2fill.eta), axis=1)
            max_dR_i_jk_2fill = awk.max(dR_i_jk_2fill, axis=1)
            min_dR_i_jk_2fill = awk.min(dR_i_jk_2fill, axis=1)
            max_dEta_i_jk_2fill = awk.max(dEta_i_jk_2fill, axis=1)
            min_dEta_i_jk_2fill = awk.min(dEta_i_jk_2fill, axis=1)
            max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk_2fill, axis=1)
            min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk_2fill, axis=1)
            max_dPtoverM_i_jk_2fill = []
            min_dPtoverM_i_jk_2fill = []
            for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill,
                            dPtoverM_2_01_2fill):
                max_dPtoverM_i_jk_2fill.append(max(pair))
                min_dPtoverM_i_jk_2fill.append(min(pair))
            max_dPtoverM_i_jk_2fill = np.array(max_dPtoverM_i_jk_2fill)
            min_dPtoverM_i_jk_2fill = np.array(min_dPtoverM_i_jk_2fill)

            max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
            min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
            max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
            max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
            min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
            min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
            min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
            max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
            max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
            min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
            max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
            min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
            max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
            min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)

            output["max_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  max_dR=max_dR_2fill)

            output["max_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    max_dEta=max_dEta_2fill)

            output["min_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_dR=min_dR_2fill)

            output["min_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    min_dEta=min_dEta_2fill)

            output["min_pt"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_pt=min_pt_2fill)

            output["max_eta"].fill(dataset=dataset_name,
                                   selection=selection,
                                   max_eta=max_eta_2fill)

            output["max_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_ptoverM=max_pt_overM_2fill)

            output["min_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_ptoverM=min_pt_overM_2fill)

            output["max_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_dR_j_jj=max_dR_i_jk_2fill)

            output["max_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dEta_j_jj=max_dEta_i_jk_2fill)

            output["max_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dPhi_j_jj=max_dPhi_i_jk_2fill)

            output["max_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                max_dPtoverM_j_jj=max_dPtoverM_i_jk_2fill)

            output["min_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_dR_j_jj=min_dR_i_jk_2fill)

            output["min_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dEta_j_jj=min_dEta_i_jk_2fill)

            output["min_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dPhi_j_jj=min_dPhi_i_jk_2fill)

            output["min_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                min_dPtoverM_j_jj=min_dPtoverM_i_jk_2fill)

        return output
Ejemplo n.º 18
0
def test_packed_selection():
    from coffea.analysis_tools import PackedSelection

    sel = PackedSelection()

    shape = (10, )
    all_true = np.full(shape=shape, fill_value=True, dtype=np.bool)
    all_false = np.full(shape=shape, fill_value=False, dtype=np.bool)
    fizz = np.arange(shape[0]) % 3 == 0
    buzz = np.arange(shape[0]) % 5 == 0
    ones = np.ones(shape=shape, dtype=np.uint64)
    wrong_shape = ones = np.ones(shape=(shape[0] - 5, ), dtype=np.bool)

    sel.add("all_true", all_true)
    sel.add("all_false", all_false)
    sel.add("fizz", fizz)
    sel.add("buzz", buzz)

    assert np.all(sel.require(all_true=True, all_false=False) == all_true)
    # allow truthy values
    assert np.all(sel.require(all_true=1, all_false=0) == all_true)
    assert np.all(sel.all("all_true", "all_false") == all_false)
    assert np.all(sel.any("all_true", "all_false") == all_true)
    assert np.all(
        sel.all("fizz", "buzz") == np.array([
            True, False, False, False, False, False, False, False, False, False
        ]))
    assert np.all(
        sel.any("fizz", "buzz") == np.array(
            [True, False, False, True, False, True, True, False, False, True]))

    with pytest.raises(ValueError):
        sel.add("wrong_shape", wrong_shape)

    with pytest.raises(ValueError):
        sel.add("ones", ones)

    with pytest.raises(RuntimeError):
        overpack = PackedSelection()
        for i in range(65):
            overpack.add("sel_%d", all_true)
Ejemplo n.º 19
0
class Selection:
    def __init__(self, **kwargs):
        '''
        kwargs should be:
        ele (loose and tight)
        mu
        jets: all, central, forward, b-tag
        met
        
        '''
        self.__dict__.update(kwargs)


        ## not yet sure whether this should go here, or later
        #self.filters   = getFilters(self.events, year=self.year, dataset=self.dataset)


    def dilep_baseline(self, omit=[], cutflow=None, tight=False, SS=True):
        '''
        give it a cutflow object if you want it to be filed.
        cuts in the omit list will not be applied
        '''
        self.selection = PackedSelection()

        is_dilep   = ((ak.num(self.ele) + ak.num(self.mu))==2)
        pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0)
        lep0pt     = ((ak.num(self.ele[(self.ele.pt>25)]) + ak.num(self.mu[(self.mu.pt>25)]))>0)
        lep1pt     = ((ak.num(self.ele[(self.ele.pt>20)]) + ak.num(self.mu[(self.mu.pt>20)]))>1)
        lepveto    = ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==2)

        dimu    = choose(self.mu, 2)
        diele   = choose(self.ele, 2)
        dilep   = cross(self.mu, self.ele)

        if SS:
            is_SS = ( ak.any((dimu['0'].charge * dimu['1'].charge)>0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)>0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)>0, axis=1) )
        else:
            is_OS = ( ak.any((dimu['0'].charge * dimu['1'].charge)<0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)<0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)<0, axis=1) )

        lepton = ak.concatenate([self.ele, self.mu], axis=1)
        lepton_pdgId_pt_ordered = ak.fill_none(
            ak.pad_none(
                lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True),
        0)

        triggers  = getTriggers(self.events,
            ak.flatten(lepton_pdgId_pt_ordered[:,0:1]),
            ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset)

        ht = ak.sum(self.jet_all.pt, axis=1)
        st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1)

        self.selection.add('lepveto',       lepveto)
        self.selection.add('dilep',         is_dilep)
        #self.selection.add('filter',        self.filters)
        self.selection.add('trigger',       triggers)
        self.selection.add('p_T(lep0)>25',  lep0pt)
        self.selection.add('p_T(lep1)>20',  lep1pt)
        if SS:
            self.selection.add('SS',            is_SS )
        else:
            self.selection.add('OS',            is_OS )
        self.selection.add('N_jet>3',       (ak.num(self.jet_all)>3) )
        self.selection.add('N_jet>4',       (ak.num(self.jet_all)>4) )
        self.selection.add('N_central>2',   (ak.num(self.jet_central)>2) )
        self.selection.add('N_central>3',   (ak.num(self.jet_central)>3) )
        self.selection.add('N_btag>0',      (ak.num(self.jet_btag)>0) )
        self.selection.add('N_fwd>0',       (ak.num(self.jet_fwd)>0) )
        self.selection.add('MET>30',        (self.met.pt>30) )
        self.selection.add('MET>50',        (self.met.pt>50) )
        self.selection.add('ST>600',        (st>600) )

        ss_reqs = [
        #    'filter',
            'lepveto',
            'dilep',
            'p_T(lep0)>25',
            'p_T(lep1)>20',
            'trigger',
            'SS' if SS else 'OS',
            'N_jet>3',
            'N_central>2',
            'N_btag>0',
            'MET>30',
            'N_fwd>0',
        ]
        
        if tight:
            ss_reqs += [
                'N_jet>4',
                'N_central>3',
                'ST>600',
                'MET>50',
                #'delta_eta',
            ]

        ss_reqs_d = { sel: True for sel in ss_reqs if not sel in omit }
        ss_selection = self.selection.require(**ss_reqs_d)

        if cutflow:
            #
            cutflow_reqs_d = {}
            for req in ss_reqs:
                cutflow_reqs_d.update({req: True})
                cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) )

        return ss_selection


    def trilep_baseline(self, omit=[], cutflow=None, tight=False):
        '''
        give it a cutflow object if you want it to be filed.
        cuts in the omit list will not be applied
        '''
        self.selection = PackedSelection()

        is_trilep  = ((ak.num(self.ele) + ak.num(self.mu))==3)
        pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0)
        lep0pt     = ((ak.num(self.ele[(self.ele.pt>25)]) + ak.num(self.mu[(self.mu.pt>25)]))>0)
        lep1pt     = ((ak.num(self.ele[(self.ele.pt>20)]) + ak.num(self.mu[(self.mu.pt>20)]))>1)
        lepveto    = ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==3)

        dimu    = choose(self.mu, 2)
        diele   = choose(self.ele, 2)
        dilep   = cross(self.mu, self.ele)

        OS_dimu = dimu[(dimu['0'].charge*dimu['1'].charge < 0)]
        OS_diele = diele[(diele['0'].charge*diele['1'].charge < 0)]

        offZ = (ak.all(abs(OS_dimu.mass-91.2)>10, axis=1) & ak.all(abs(OS_diele.mass-91.2)>10, axis=1))

        lepton = ak.concatenate([self.ele, self.mu], axis=1)
        lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0)

        triggers  = getTriggers(self.events,
            ak.flatten(lepton_pdgId_pt_ordered[:,0:1]),
            ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset)

        ht = ak.sum(self.jet_all.pt, axis=1)
        st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1)

        self.selection.add('lepveto',       lepveto)
        self.selection.add('trilep',        is_trilep)
        self.selection.add('filter',        self.filters)
        self.selection.add('trigger',       triggers)
        self.selection.add('p_T(lep0)>25',  lep0pt)
        self.selection.add('p_T(lep1)>20',  lep1pt)
        self.selection.add('N_jet>2',       (ak.num(self.jet_all)>2) )
        self.selection.add('N_jet>3',       (ak.num(self.jet_all)>3) )
        self.selection.add('N_central>1',   (ak.num(self.jet_central)>1) )
        self.selection.add('N_central>2',   (ak.num(self.jet_central)>2) )
        self.selection.add('N_btag>0',      (ak.num(self.jet_btag)>0) )
        self.selection.add('N_fwd>0',       (ak.num(self.jet_fwd)>0) )
        self.selection.add('MET>50',        (self.met.pt>50) )
        self.selection.add('ST>600',        (st>600) )
        self.selection.add('offZ',          offZ )

        reqs = [
            'filter',
            'lepveto',
            'trilep',
            'p_T(lep0)>25',
            'p_T(lep1)>20',
            'trigger',
            'offZ',
            'MET>50',
            'N_jet>2',
            'N_central>1',
            'N_btag>0',
            'N_fwd>0',
        ]
        
        if tight:
            reqs += [
                'N_jet>3',
                'N_central>2',
                'ST>600',
                #'MET>50',
                #'delta_eta',
            ]

        reqs_d = { sel: True for sel in reqs if not sel in omit }
        selection = self.selection.require(**reqs_d)

        self.reqs = [ sel for sel in reqs if not sel in omit ]

        if cutflow:
            #
            cutflow_reqs_d = {}
            for req in reqs:
                cutflow_reqs_d.update({req: True})
                cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) )

        return selection
Ejemplo n.º 20
0
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)
        triggers = getTriggers(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(electron) == 1) & (ak.num(muon) == 1))
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(vetoelectron) + ak.num(vetomuon)) == 2)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('trigger', (triggers))
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('OS', OSlepton)
        selection.add('N_btag=2', (ak.num(btag) == 2))
        selection.add('N_jet>2', (ak.num(jet) >= 3))
        selection.add('N_central>1', (ak.num(central) >= 2))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))
        selection.add('MET>30', (ev.MET.pt > 30))

        os_reqs = [
            'lepveto', 'dilep', 'trigger', 'filter', 'p_T(lep0)>25',
            'p_T(lep1)>20', 'OS'
        ]
        bl_reqs = os_reqs + [
            'N_btag=2', 'N_jet>2', 'N_central>1', 'N_fwd>0', 'MET>30'
        ]

        os_reqs_d = {sel: True for sel in os_reqs}
        os_selection = selection.require(**os_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        cutflow = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in bl_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow(req, selection.require(**cutflow_reqs_d))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[os_selection].npvs,
                               weight=weight.weight()[os_selection])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_selection].npvsGood,
                                   weight=weight.weight()[os_selection])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[os_selection],
                           weight=weight.weight()[os_selection])
        output['N_central'].fill(dataset=dataset,
                                 multiplicity=ak.num(central)[os_selection],
                                 weight=weight.weight()[os_selection])
        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(electron)[os_selection],
                            weight=weight.weight()[os_selection])
        output['N_fwd'].fill(dataset=dataset,
                             multiplicity=ak.num(fwd)[os_selection],
                             weight=weight.weight()[os_selection])

        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[os_selection].pt,
                           phi=ev.MET[os_selection].phi,
                           weight=weight.weight()[os_selection])

        output['electron'].fill(dataset=dataset,
                                pt=ak.to_numpy(ak.flatten(electron[BL].pt)),
                                eta=ak.to_numpy(ak.flatten(electron[BL].eta)),
                                phi=ak.to_numpy(ak.flatten(electron[BL].phi)),
                                weight=weight.weight()[BL])

        output['muon'].fill(dataset=dataset,
                            pt=ak.to_numpy(ak.flatten(muon[BL].pt)),
                            eta=ak.to_numpy(ak.flatten(muon[BL].eta)),
                            phi=ak.to_numpy(ak.flatten(muon[BL].phi)),
                            weight=weight.weight()[BL])

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['trail_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['fwd_jet'].fill(dataset=dataset,
                               pt=ak.flatten(high_p_fwd[BL].pt_nom),
                               eta=ak.flatten(high_p_fwd[BL].eta),
                               phi=ak.flatten(high_p_fwd[BL].phi),
                               weight=weight.weight()[BL])

        output['b1'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 0:1][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 0:1][BL].phi),
                          weight=weight.weight()[BL])

        output['b2'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 1:2][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt_nom[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        output['j2'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(jet[:, 1:2][BL].eta),
                          phi=ak.flatten(jet[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j3'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 2:3][BL].pt_nom),
                          eta=ak.flatten(jet[:, 2:3][BL].eta),
                          phi=ak.flatten(jet[:, 2:3][BL].phi),
                          weight=weight.weight()[BL])

        # Now, take care of systematic unceratinties
        if not dataset == 'MuonEG':
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt > 25)]
                jet = jet[~match(
                    jet, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet = jet[~match(
                    jet, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                central = jet[(abs(jet.eta) < 2.4)]
                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd = getFwdJet(light)
                fwd_noPU = getFwdJet(light, puId=False)

                ## forward jets
                high_p_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.p, axis=1))]  # highest momentum spectator
                high_pt_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(
                    ak.argmax(abs(fwd.eta), axis=1))]  # most forward spectator

                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(
                    central.btagDeepFlavB)][:, :2]

                # get the modified selection -> more difficult
                selection.add('N_jet>2_' + var,
                              (ak.num(jet.pt) >= 3))  # stupid bug here...
                selection.add('N_btag=2_' + var, (ak.num(btag) == 2))
                selection.add('N_central>1_' + var, (ak.num(central) >= 2))
                selection.add('N_fwd>0_' + var, (ak.num(fwd) >= 1))
                selection.add('MET>30_' + var, (getattr(ev.MET, var) > 30))

                ## Don't change the selection for now...
                bl_reqs = os_reqs + [
                    'N_jet>2_' + var, 'MET>30_' + var, 'N_btag=2_' + var,
                    'N_central>1_' + var, 'N_fwd>0_' + var
                ]
                bl_reqs_d = {sel: True for sel in bl_reqs}
                BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(jet)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_fwd_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(fwd)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_central_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(central)[os_selection],
                    weight=weight.weight()[os_selection])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet.eta[:, 0:1][BL]),
                                         phi=ak.flatten(jet.phi[:, 0:1][BL]),
                                         weight=weight.weight()[BL])

                output['b1_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta=ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi=ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight=weight.weight()[BL])

                output['fwd_jet_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_p_fwd[BL].pt),
                    eta=ak.flatten(high_p_fwd[BL].eta),
                    phi=ak.flatten(high_p_fwd[BL].phi),
                    weight=weight.weight()[BL])

                output['MET_' + var].fill(dataset=dataset,
                                          pt=getattr(ev.MET,
                                                     var)[os_selection],
                                          phi=ev.MET[os_selection].phi,
                                          weight=weight.weight()[os_selection])

        return output
Ejemplo n.º 21
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet)>=2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        
        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 15) & (np.abs(electron.eta) < 2.4)]

        electron = electron[(electron.genPartIdx >= 0)]
        electron = electron[(np.abs(electron.matched_gen.pdgId)==11)]  #from here on all leptons are gen-matched
        electron = electron[( (electron.genPartFlav==1) | (electron.genPartFlav==15) )] #and now they are all prompt
     
        
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]
        
        leading_parent = find_first_parent(leading_electron.matched_gen)
        trailing_parent = find_first_parent(trailing_electron.matched_gen)
        
       
        is_flipped = ( ( (electron.matched_gen.pdgId*(-1) == electron.pdgId) | (find_first_parent(electron.matched_gen)*(-1) == electron.pdgId) ) & (np.abs(electron.pdgId) == 11) )
        
        
        flipped_electron = electron[is_flipped]
        flipped_electron = flipped_electron[(ak.fill_none(flipped_electron.pt, 0)>0)]
        flipped_electron = flipped_electron[~(ak.is_none(flipped_electron))]
        n_flips = ak.num(flipped_electron)
                
        ##Muons
        muon     = Collections(ev, "Muon", "tightFCNC").get()
        muon = muon[(muon.pt > 15) & (np.abs(muon.eta) < 2.4)]
        
        muon = muon[(muon.genPartIdx >= 0)]
        muon = muon[(np.abs(muon.matched_gen.pdgId)==13)] #from here, all muons are gen-matched
        muon = muon[( (muon.genPartFlav==1) | (muon.genPartFlav==15) )] #and now they are all prompt
       
        
        ##Leptons

        lepton   = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton)==2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton)==2)
        
        emulepton = (ak.num(electron) == 1) & (ak.num(muon) == 1)
        no_mumu = (ak.num(muon) <= 1)
        
        
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        
        
        #jets
        jet       = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet       = jet[ak.argsort(jet.pt, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] 
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights( len(ev) )
        weight2 = Weights( len(ev))
        
        if not dataset=='MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)
            
        weight2.add("charge flip", self.charge_flip_ratio.flip_weight(electron))
                                   
                      
        #selections    
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)
        diele = (ak.num(electron) == 2)
        emu = (emulepton)
        flips = (n_flips == 1)
        no_flips = (n_flips == 0)
        nmm = no_mumu
        
        
        selection = PackedSelection()
        selection.add('filter',      (filters) )
        selection.add('ss',          ss )
        selection.add('os',          os )
        selection.add('jet',         jet_all )
        selection.add('ee',          diele)
        selection.add('emu',         emu)
        selection.add('flip',        flips)
        selection.add('nflip',       no_flips)
        selection.add('no_mumu',     nmm)
        
        bl_reqs = ['filter'] + ['jet']

        bl_reqs_d = { sel: True for sel in bl_reqs }
        baseline = selection.require(**bl_reqs_d)
        
        f_reqs = bl_reqs + ['flip'] + ['ss'] + ['ee']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)
        
        f2_reqs = bl_reqs + ['flip'] + ['ss'] + ['emu']
        f2_reqs_d = {sel: True for sel in f2_reqs}
        flip_sel2 = selection.require(**f2_reqs_d)
        
        f3_reqs = bl_reqs + ['flip'] + ['ss'] + ['no_mumu']
        f3_reqs_d = {sel: True for sel in f3_reqs}
        flip_sel3 = selection.require(**f3_reqs_d)
        
        nf_reqs = bl_reqs + ['nflip'] + ['os'] + ['ee']
        nf_reqs_d = {sel: True for sel in nf_reqs}
        n_flip_sel = selection.require(**nf_reqs_d)
        
        nf2_reqs = bl_reqs + ['nflip'] + ['os'] + ['emu']
        nf2_reqs_d = {sel: True for sel in nf2_reqs}
        n_flip_sel2 = selection.require(**nf2_reqs_d)
        
        nf3_reqs = bl_reqs + ['nflip'] + ['os'] + ['no_mumu']
        nf3_reqs_d = {sel: True for sel in nf3_reqs}
        n_flip_sel3 = selection.require(**nf3_reqs_d)
        
        s_reqs = bl_reqs + ['ss'] + ['no_mumu']
        s_reqs_d = { sel: True for sel in s_reqs }
        ss_sel = selection.require(**s_reqs_d)
        
        o_reqs = bl_reqs + ['os'] + ['no_mumu']
        o_reqs_d = {sel: True for sel in o_reqs }
        os_sel = selection.require(**o_reqs_d)
        
        ees_reqs = bl_reqs + ['ss'] + ['ee']
        ees_reqs_d = { sel: True for sel in ees_reqs }
        eess_sel = selection.require(**ees_reqs_d)
        
        eeo_reqs = bl_reqs + ['os'] + ['ee']
        eeo_reqs_d = {sel: True for sel in eeo_reqs }
        eeos_sel = selection.require(**eeo_reqs_d)
        
        ems_reqs = bl_reqs + ['ss'] + ['emu']
        ems_reqs_d = { sel: True for sel in ems_reqs }
        emss_sel = selection.require(**ems_reqs_d)
        
        emo_reqs = bl_reqs + ['os'] + ['emu']
        emo_reqs_d = {sel: True for sel in emo_reqs }
        emos_sel = selection.require(**emo_reqs_d)
        
       
        #outputs
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[baseline], weight=weight.weight()[baseline])
        
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(lepton)[ss_sel], weight=weight.weight()[ss_sel])
                      
        output['N_ele2'].fill(dataset=dataset, multiplicity=ak.num(lepton)[os_sel], weight=weight2.weight()[os_sel])
        
        output['electron_flips'].fill(dataset=dataset, multiplicity = n_flips[flip_sel], weight=weight.weight()[flip_sel])

        output['electron_flips2'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel], weight=weight2.weight()[n_flip_sel])
        
        output['electron_flips3'].fill(dataset=dataset, multiplicity = n_flips[flip_sel2], weight=weight.weight()[flip_sel2])

        output['electron_flips4'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel2], weight=weight2.weight()[n_flip_sel2])
        

        output["electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel3].eta))),
            weight = weight.weight()[flip_sel3]
        )
        
        output["electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].eta))),
            weight = weight2.weight()[n_flip_sel3]
        )
        
        output["flipped_electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel].eta))),
            weight = weight.weight()[flip_sel]
        )
        
        output["flipped_electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].eta))),
            weight = weight2.weight()[n_flip_sel]
        )
        
        output["flipped_electron3"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel2].eta))),
            weight = weight.weight()[flip_sel2]
        )
        
        output["flipped_electron4"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].eta))),
            weight = weight2.weight()[n_flip_sel2]
        )
        
        #output["lepton_parent"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(leading_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)
        #
        #output["lepton_parent2"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(trailing_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)

        return output
Ejemplo n.º 22
0
    def process(self, events):

        events = events[
            ak.num(events.Jet) >
            0]  #corrects for rare case where there isn't a single jet in event
        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        # cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ### For FCNC, we want electron -> tightTTH
        electron = Collections(ev, "Electron", "tightFCNC").get()
        fakeableelectron = Collections(ev, "Electron", "fakeableFCNC").get()

        muon = Collections(ev, "Muon", "tightFCNC").get()
        fakeablemuon = Collections(ev, "Muon", "fakeableFCNC").get()

        ##Jets
        Jets = events.Jet

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        lepton = fakeablemuon  #ak.concatenate([fakeablemuon, fakeableelectron], axis=1)
        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        selection = PackedSelection()
        selection.add('MET<20', (ev.MET.pt < 20))
        selection.add('mt<20', min_mt_lep_met < 20)
        #selection.add('MET<19',        (ev.MET.pt<19) )
        selection_reqs = ['MET<20', 'mt<20']  #, 'MET<19']
        fcnc_reqs_d = {sel: True for sel in selection_reqs}
        fcnc_selection = selection.require(**fcnc_reqs_d)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)

        jets = getJets(
            ev, maxEta=2.4, minPt=25, pt_var='pt'
        )  #& (ak.num(jets[~match(jets, fakeablemuon, deltaRCut=1.0)])>=1)
        single_muon_sel = (ak.num(muon) == 1) & (ak.num(fakeablemuon) == 1) | (
            ak.num(muon) == 0) & (ak.num(fakeablemuon) == 1)
        single_electron_sel = (ak.num(electron) == 1) & (
            ak.num(fakeableelectron)
            == 1) | (ak.num(electron) == 0) & (ak.num(fakeableelectron) == 1)
        fcnc_muon_sel = (ak.num(
            jets[~match(jets, fakeablemuon, deltaRCut=1.0)]) >=
                         1) & fcnc_selection & single_muon_sel
        fcnc_electron_sel = (ak.num(
            jets[~match(jets, fakeableelectron, deltaRCut=1.0)]) >=
                             1) & fcnc_selection & single_electron_sel
        tight_muon_sel = (ak.num(muon) == 1) & fcnc_muon_sel
        loose_muon_sel = (ak.num(fakeablemuon) == 1) & fcnc_muon_sel
        tight_electron_sel = (ak.num(electron) == 1) & fcnc_electron_sel
        loose_electron_sel = (ak.num(fakeableelectron)
                              == 1) & fcnc_electron_sel

        output['single_mu_fakeable'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].eta))))
        output['single_mu'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(muon[tight_muon_sel].conePt)),
            eta=np.abs(ak.to_numpy(ak.flatten(muon[tight_muon_sel].eta))))
        output['single_e_fakeable'].fill(
            dataset=dataset,
            pt=ak.to_numpy(
                ak.flatten(fakeableelectron[loose_electron_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(
                    ak.flatten(fakeableelectron[loose_electron_sel].eta))))
        output['single_e'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(electron[tight_electron_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(ak.flatten(electron[tight_electron_sel].eta))))

        if self.debug:
            #create pandas dataframe for debugging
            passed_events = ev[tight_muon_sel]
            passed_muons = muon[tight_muon_sel]
            event_p = ak.to_pandas(passed_events[["event"]])
            event_p["MET_PT"] = passed_events["MET"]["pt"]
            event_p["mt"] = min_mt_lep_met[tight_muon_sel]
            event_p["num_tight_mu"] = ak.to_numpy(ak.num(muon)[tight_muon_sel])
            event_p["num_loose_mu"] = ak.num(fakeablemuon)[tight_muon_sel]
            muon_p = ak.to_pandas(
                ak.flatten(passed_muons)[[
                    "pt", "conePt", "eta", "dz", "dxy", "ptErrRel",
                    "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso",
                    "jetPtRelv2"
                ]])
            #convert to numpy array for the output
            events_array = pd.concat([muon_p, event_p], axis=1)

            events_to_add = [6886009]
            for e in events_to_add:
                tmp_event = ev[ev.event == e]
                added_event = ak.to_pandas(tmp_event[["event"]])
                added_event["MET_PT"] = tmp_event["MET"]["pt"]
                added_event["mt"] = min_mt_lep_met[ev.event == e]
                added_event["num_tight_mu"] = ak.to_numpy(
                    ak.num(muon)[ev.event == e])
                added_event["num_loose_mu"] = ak.to_numpy(
                    ak.num(fakeablemuon)[ev.event == e])
                add_muon = ak.to_pandas(
                    ak.flatten(muon[ev.event == e])[[
                        "pt", "conePt", "eta", "dz", "dxy", "ptErrRel",
                        "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso",
                        "jetPtRelv2"
                    ]])
                add_concat = pd.concat([add_muon, added_event], axis=1)
                events_array = pd.concat([events_array, add_concat], axis=0)

            output['muons_df'] += processor.column_accumulator(
                events_array.to_numpy())

        return output
Ejemplo n.º 23
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        max_dR = awk.max(dR_ij, axis=1)
        max_dEta = awk.max(dEta_ij, axis=1)
        min_dR = awk.min(dR_ij, axis=1)
        min_dEta = awk.min(dEta_ij, axis=1)
        min_pT = awk.min(selected_jets.pt, axis=1)
        max_eta = abs(awk.max(selected_jets.eta, axis=1))

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)
        dPt_i_jk = abs(selected_jets[:, jet_i].pt -
                       (selected_jets[:, jet_j] + selected_jets[:, jet_k]).pt)

        max_dPhi_jjj = awk.max(dPhi_i_jk, axis=1)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        max_pt_overM = awk.max(pt_i_overM, axis=1)
        min_pt_overM = awk.min(pt_i_overM, axis=1)
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j

        for pt_cut in range(30, 1150, 5):
            cut_name = f"min_pT_cut{pt_cut}".format(pt_cut)
            selection = PackedSelection()
            selection.add("MinJetPt_cut", min_pT > pt_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_min_pT_cut{pt_cut}".format(
                pt_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for eta_cut in np.arange(0, 2.5, 0.05):
            cut_name = f"max_eta_cut{eta_cut}".format(eta_cut)
            selection = PackedSelection()
            selection.add("MaxJetEta_cut", max_eta < eta_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_max_eta_cut{eta_cut}".format(
                eta_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dEta_max_cut in np.arange(0, 5, 0.1):
            cut_name = f"dEta_max_cut{dEta_max_cut}".format(dEta_max_cut)
            selection = PackedSelection()
            selection.add("MaxJJdEta_cut", max_dEta < dEta_max_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dEta_jj_max_cut{dEta_max_cut}".format(
                dEta_max_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dR_min_cut in np.arange(0, 5, 0.1):
            cut_name = f"dR_min_cut{dR_min_cut}".format(dR_min_cut)
            selection = PackedSelection()
            selection.add("MinJJdR_cut", min_dR > dR_min_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dR_jj_min_cut{dR_min_cut}".format(
                dR_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        #min cut for the variable dPhi_jjj_max
        # for dPhi_jjj_max_min_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        # for dPhi_jjj_min_max_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        return output
Ejemplo n.º 24
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        #output['totalEvents']['all'] += len(events)
        #output['skimmedEvents']['all'] += len(ev)

        if self.year == 2018:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2017:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2016:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ

        if self.year == 2018:
            lumimask = LumiMask(
                'processors/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt'
            )

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)]

        loose_electron = Collections(ev, "Electron", "veto").get()
        loose_electron = loose_electron[(loose_electron.pt > 25)
                                        & (np.abs(loose_electron.eta) < 2.4)]

        SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron)
                                                               == 2)
        OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron)
                                                               == 2)

        dielectron = choose(electron, 2)
        dielectron_mass = (dielectron['0'] + dielectron['1']).mass
        dielectron_pt = (dielectron['0'] + dielectron['1']).pt

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[(leading_electron_idx)]
        leading_electron = leading_electron[(leading_electron.pt > 30)]

        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]

        ##Muons

        loose_muon = Collections(ev, "Muon", "veto").get()
        loose_muon = loose_muon[(loose_muon.pt > 20)
                                & (np.abs(loose_muon.eta) < 2.4)]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt', UL=False)
        jet = jet[ak.argsort(
            jet.pt, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, loose_muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        mask = lumimask(ev.run, ev.luminosityBlock)
        ss = (SSelectron)
        os = (OSelectron)
        mass = (ak.min(np.abs(dielectron_mass - 91.2), axis=1) < 15)
        lead_electron = (ak.min(leading_electron.pt, axis=1) > 30)
        jet1 = (ak.num(jet) >= 1)
        jet2 = (ak.num(jet) >= 2)
        num_loose = ((ak.num(loose_electron) == 2) & (ak.num(loose_muon) == 0))

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('mask', (mask))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('mass', mass)
        selection.add('leading', lead_electron)
        selection.add('triggers', triggers)
        selection.add('one jet', jet1)
        selection.add('two jets', jet2)
        selection.add('num_loose', num_loose)

        bl_reqs = ['filter'] + ['mass'] + ['mask'] + ['triggers'] + [
            'leading'
        ] + ['num_loose']
        #bl_reqs = ['filter'] + ['mass'] + ['triggers'] + ['leading'] + ['num_loose']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        j1s_reqs = s_reqs + ['one jet']
        j1s_reqs_d = {sel: True for sel in j1s_reqs}
        j1ss_sel = selection.require(**j1s_reqs_d)

        j1o_reqs = o_reqs + ['one jet']
        j1o_reqs_d = {sel: True for sel in j1o_reqs}
        j1os_sel = selection.require(**j1o_reqs_d)

        j2s_reqs = s_reqs + ['two jets']
        j2s_reqs_d = {sel: True for sel in j2s_reqs}
        j2ss_sel = selection.require(**j2s_reqs_d)

        j2o_reqs = o_reqs + ['two jets']
        j2o_reqs_d = {sel: True for sel in j2o_reqs}
        j2os_sel = selection.require(**j2o_reqs_d)

        output["N_jet"].fill(
            dataset=dataset,
            multiplicity=ak.num(jet)[os_sel],
        )

        return output
Ejemplo n.º 25
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.pt > 20) & (abs(electron.eta) < 2.4)]

        electron = electron[((electron.genPartIdx >= 0) &
                             (np.abs(electron.matched_gen.pdgId) == 11)
                             )]  #from here on all leptons are gen-matched

        ##Muons
        muon = Collections(ev, "Muon", "tight").get()
        muon = muon[(muon.pt > 20) & (abs(muon.eta) < 2.4)]

        muon = muon[((muon.genPartIdx >= 0) &
                     (np.abs(muon.matched_gen.pdgId) == 13))]

        ##Leptons

        lepton = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton) == 2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton) == 2)

        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet = jet[ak.argsort(
            jet.pt, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(jet, electron, deltaRCut=0.4)]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)

        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_weight(electron))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('jet', jet_all)

        bl_reqs = ['filter', 'jet']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        #outputs
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[baseline],
                             weight=weight.weight()[baseline])

        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(lepton)[ss_sel],
                             weight=weight.weight()[ss_sel])

        output['N_ele2'].fill(dataset=dataset,
                              multiplicity=ak.num(lepton)[os_sel],
                              weight=weight2.weight()[os_sel])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[ss_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[os_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[os_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        return output
Ejemplo n.º 26
0
    def process_shift(self, events, shift_name):
        dataset = events.metadata['dataset']
        isRealData = not hasattr(events, "genWeight")
        selection = PackedSelection()
        weights = Weights(len(events), storeIndividual=True)
        output = self.make_output()
        if shift_name is None and not isRealData:
            output['sumw'] = ak.sum(events.genWeight)

        if isRealData or self._newTrigger:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._triggers[self._year]:
                if t in events.HLT.fields:
                    trigger = trigger | events.HLT[t]
            selection.add('trigger', trigger)
            del trigger
        else:
            selection.add('trigger', np.ones(len(events), dtype='bool'))

        if isRealData:
            selection.add(
                'lumimask', lumiMasks[self._year](events.run,
                                                  events.luminosityBlock))
        else:
            selection.add('lumimask', np.ones(len(events), dtype='bool'))

        if isRealData and self._skipRunB and self._year == '2017':
            selection.add('dropB', events.run > 299329)
        else:
            selection.add('dropB', np.ones(len(events), dtype='bool'))

        if isRealData:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._muontriggers[self._year]:
                if t in events.HLT.fields:
                    trigger |= np.array(events.HLT[t])
            selection.add('muontrigger', trigger)
            del trigger
        else:
            selection.add('muontrigger', np.ones(len(events), dtype='bool'))

        metfilter = np.ones(len(events), dtype='bool')
        for flag in self._met_filters[
                self._year]['data' if isRealData else 'mc']:
            metfilter &= np.array(events.Flag[flag])
        selection.add('metfilter', metfilter)
        del metfilter

        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)
        fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year]

        candidatejet = fatjets[
            # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
            & fatjets.isTight  # this is loose in sampleContainer
        ]

        candidatejet = candidatejet[:, :
                                    2]  # Only consider first two to match generators
        if self._jet_arbitration == 'pt':
            candidatejet = ak.firsts(candidatejet)
        elif self._jet_arbitration == 'mass':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.msdcorr, axis=1, keepdims=True)])
        elif self._jet_arbitration == 'n2':
            candidatejet = ak.firsts(candidatejet[ak.argmin(candidatejet.n2ddt,
                                                            axis=1,
                                                            keepdims=True)])
        elif self._jet_arbitration == 'ddb':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.btagDDBvLV2, axis=1, keepdims=True)])
        elif self._jet_arbitration == 'ddc':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.btagDDCvLV2, axis=1, keepdims=True)])
        else:
            raise RuntimeError("Unknown candidate jet arbitration")

        if self._tagger == 'v1':
            bvl = candidatejet.btagDDBvL
            cvl = candidatejet.btagDDCvL
            cvb = candidatejet.btagDDCvB
        elif self._tagger == 'v2':
            bvl = candidatejet.btagDDBvLV2
            cvl = candidatejet.btagDDCvLV2
            cvb = candidatejet.btagDDCvBV2
        elif self._tagger == 'v3':
            bvl = candidatejet.particleNetMD_Xbb
            cvl = candidatejet.particleNetMD_Xcc / (
                1 - candidatejet.particleNetMD_Xbb)
            cvb = candidatejet.particleNetMD_Xcc / (
                candidatejet.particleNetMD_Xcc +
                candidatejet.particleNetMD_Xbb)

        elif self._tagger == 'v4':
            bvl = candidatejet.particleNetMD_Xbb
            cvl = candidatejet.btagDDCvLV2
            cvb = candidatejet.particleNetMD_Xcc / (
                candidatejet.particleNetMD_Xcc +
                candidatejet.particleNetMD_Xbb)
        else:
            raise ValueError("Not an option")

        selection.add('minjetkin', (candidatejet.pt >= 450)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('_strict_mass', (candidatejet.msdcorr > 85) &
                      (candidatejet.msdcorr < 130))
        selection.add('_high_score', cvl > 0.8)
        selection.add('minjetkinmu', (candidatejet.pt >= 400)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('minjetkinw', (candidatejet.pt >= 200)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('jetid', candidatejet.isTight)
        selection.add('n2ddt', (candidatejet.n2ddt < 0.))
        if not self._tagger == 'v2':
            selection.add('ddbpass', (bvl >= 0.89))
            selection.add('ddcpass', (cvl >= 0.83))
            selection.add('ddcvbpass', (cvb >= 0.2))
        else:
            selection.add('ddbpass', (bvl >= 0.7))
            selection.add('ddcpass', (cvl >= 0.45))
            selection.add('ddcvbpass', (cvb >= 0.03))

        jets = events.Jet
        jets = jets[(jets.pt > 30.) & (abs(jets.eta) < 2.5) & jets.isTight]
        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        dphi = abs(jets.delta_phi(candidatejet))
        selection.add(
            'antiak4btagMediumOppHem',
            ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch],
                   axis=1,
                   mask_identity=False) <
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])
        ak4_away = jets[dphi > 0.8]
        selection.add(
            'ak4btagMedium08',
            ak.max(ak4_away[self._ak4tagBranch], axis=1, mask_identity=False) >
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])

        met = events.MET
        selection.add('met', met.pt < 140.)

        goodmuon = ((events.Muon.pt > 10)
                    & (abs(events.Muon.eta) < 2.4)
                    & (events.Muon.pfRelIso04_all < 0.25)
                    & events.Muon.looseId)
        nmuons = ak.sum(goodmuon, axis=1)
        leadingmuon = ak.firsts(events.Muon[goodmuon])

        if self._looseTau:
            goodelectron = ((events.Electron.pt > 10)
                            & (abs(events.Electron.eta) < 2.5)
                            &
                            (events.Electron.cutBased >= events.Electron.VETO))
            nelectrons = ak.sum(goodelectron, axis=1)

            ntaus = ak.sum(
                ((events.Tau.pt > 20)
                 & (abs(events.Tau.eta) < 2.3)
                 & events.Tau.idDecayMode
                 & ((events.Tau.idMVAoldDM2017v2 & 2) != 0)
                 & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4,
                          axis=2)
                 & ak.all(events.Tau.metric_table(
                     events.Electron[goodelectron]) > 0.4,
                          axis=2)),
                axis=1,
            )
        else:
            goodelectron = (
                (events.Electron.pt > 10)
                & (abs(events.Electron.eta) < 2.5)
                & (events.Electron.cutBased >= events.Electron.LOOSE))
            nelectrons = ak.sum(goodelectron, axis=1)

            ntaus = ak.sum(
                (events.Tau.pt > 20)
                &
                events.Tau.idDecayMode  # bacon iso looser than Nano selection
                & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4,
                         axis=2)
                & ak.all(events.Tau.metric_table(events.Electron[goodelectron])
                         > 0.4,
                         axis=2),
                axis=1,
            )

        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon',
                      (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin',
                      (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1))
        selection.add('muonDphiAK8',
                      abs(leadingmuon.delta_phi(candidatejet)) > 2 * np.pi / 3)

        # W-Tag (Tag and Probe)
        # tag side
        selection.add(
            'ak4btagMediumOppHem',
            ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch],
                   axis=1,
                   mask_identity=False) >
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])
        selection.add('met40p', met.pt > 40.)
        selection.add('tightMuon',
                      (leadingmuon.tightId) & (leadingmuon.pt > 53.))
        # selection.add('ptrecoW', (leadingmuon + met).pt > 250.)
        selection.add('ptrecoW200', (leadingmuon + met).pt > 200.)
        selection.add(
            'ak4btagNearMu',
            leadingmuon.delta_r(leadingmuon.nearest(ak4_away, axis=None)) <
            2.0)
        _bjets = jets[self._ak4tagBranch] > BTagEfficiency.btagWPs[
            self._ak4tagger][self._year]['medium']
        # _nearAK8 = jets.delta_r(candidatejet)  < 0.8
        # _nearMu = jets.delta_r(ak.firsts(events.Muon))  < 0.3
        # selection.add('ak4btagOld', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)
        _nearAK8 = jets.delta_r(candidatejet) < 0.8
        _nearMu = jets.delta_r(leadingmuon) < 0.3
        selection.add('ak4btagOld',
                      ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)

        # _nearAK8 = jets.delta_r(candidatejet)  < 0.8
        # _nearMu = jets.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None))  < 0.3
        # selection.add('ak4btagNew', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)

        # probe side
        selection.add('minWjetpteta',
                      (candidatejet.pt >= 200) & (abs(candidatejet.eta) < 2.4))
        # selection.add('noNearMuon', candidatejet.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None)) > 1.0)
        selection.add('noNearMuon', candidatejet.delta_r(leadingmuon) > 1.0)
        #####

        if isRealData:
            genflavor = ak.zeros_like(candidatejet.pt)
        else:
            if 'HToCC' in dataset or 'HToBB' in dataset:
                if self._ewkHcorr:
                    add_HiggsEW_kFactors(weights, events.GenPart, dataset)

            weights.add('genweight', events.genWeight)
            if "PSWeight" in events.fields:
                add_ps_weight(weights, events.PSWeight)
            else:
                add_ps_weight(weights, None)
            if "LHEPdfWeight" in events.fields:
                add_pdf_weight(weights, events.LHEPdfWeight)
            else:
                add_pdf_weight(weights, None)
            if "LHEScaleWeight" in events.fields:
                add_scalevar_7pt(weights, events.LHEScaleWeight)
                add_scalevar_3pt(weights, events.LHEScaleWeight)
            else:
                add_scalevar_7pt(weights, [])
                add_scalevar_3pt(weights, [])

            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            bosons = getBosons(events.GenPart)
            matchedBoson = candidatejet.nearest(bosons,
                                                axis=None,
                                                threshold=0.8)
            if self._tightMatch:
                match_mask = (
                    (candidatejet.pt - matchedBoson.pt) / matchedBoson.pt <
                    0.5) & ((candidatejet.msdcorr - matchedBoson.mass) /
                            matchedBoson.mass < 0.3)
                selmatchedBoson = ak.mask(matchedBoson, match_mask)
                genflavor = bosonFlavor(selmatchedBoson)
            else:
                genflavor = bosonFlavor(matchedBoson)
            genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
            if self._newVjetsKfactor:
                add_VJets_kFactors(weights, events.GenPart, dataset)
            else:
                add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            if shift_name is None:
                output['btagWeight'].fill(val=self._btagSF.addBtagWeight(
                    weights, ak4_away, self._ak4tagBranch))
            if self._nnlops_rew and dataset in [
                    'GluGluHToCC_M125_13TeV_powheg_pythia8'
            ]:
                weights.add('minlo_rew',
                            powheg_to_nnlops(ak.to_numpy(genBosonPt)))

            if self._newTrigger:
                add_jetTriggerSF(
                    weights, ak.firsts(fatjets),
                    self._year if not self._skipRunB else f'{self._year}CDEF',
                    selection)
            else:
                add_jetTriggerWeight(weights, candidatejet.msdcorr,
                                     candidatejet.pt, self._year)

            add_mutriggerSF(weights, leadingmuon, self._year, selection)
            add_mucorrectionsSF(weights, leadingmuon, self._year, selection)

            if self._year in ("2016", "2017"):
                weights.add("L1Prefiring", events.L1PreFiringWeight.Nom,
                            events.L1PreFiringWeight.Up,
                            events.L1PreFiringWeight.Dn)

            logger.debug("Weight statistics: %r" % weights.weightStatistics)

        msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * (
            genflavor > 0) + candidatejet.msdcorr * (genflavor == 0)

        regions = {
            'signal': [
                'noleptons', 'minjetkin', 'met', 'metfilter', 'jetid',
                'antiak4btagMediumOppHem', 'n2ddt', 'trigger', 'lumimask'
            ],
            'signal_noddt': [
                'noleptons', 'minjetkin', 'met', 'jetid',
                'antiak4btagMediumOppHem', 'trigger', 'lumimask', 'metfilter'
            ],
            # 'muoncontrol': ['minjetkinmu', 'jetid', 'n2ddt', 'ak4btagMedium08', 'onemuon', 'muonkin', 'muonDphiAK8', 'muontrigger', 'lumimask', 'metfilter'],
            'muoncontrol': [
                'onemuon', 'muonkin', 'muonDphiAK8', 'metfilter',
                'minjetkinmu', 'jetid', 'ak4btagMedium08', 'n2ddt',
                'muontrigger', 'lumimask'
            ],
            'muoncontrol_noddt': [
                'onemuon', 'muonkin', 'muonDphiAK8', 'jetid', 'metfilter',
                'minjetkinmu', 'jetid', 'ak4btagMedium08', 'muontrigger',
                'lumimask'
            ],
            'wtag': [
                'onemuon', 'tightMuon', 'minjetkinw', 'jetid', 'met40p',
                'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger',
                'lumimask'
            ],
            'wtag0': [
                'onemuon', 'tightMuon', 'met40p', 'metfilter', 'ptrecoW200',
                'ak4btagOld', 'muontrigger', 'lumimask'
            ],
            'wtag2': [
                'onemuon', 'tightMuon', 'minjetkinw', 'jetid',
                'ak4btagMediumOppHem', 'met40p', 'metfilter', 'ptrecoW200',
                'ak4btagOld', 'muontrigger', 'lumimask'
            ],
            'noselection': [],
        }

        def normalize(val, cut):
            if cut is None:
                ar = ak.to_numpy(ak.fill_none(val, np.nan))
                return ar
            else:
                ar = ak.to_numpy(ak.fill_none(val[cut], np.nan))
                return ar

        import time
        tic = time.time()
        if shift_name is None:
            for region, cuts in regions.items():
                allcuts = set([])
                cut = selection.all(*allcuts)
                output['cutflow_msd'].fill(region=region,
                                           genflavor=normalize(
                                               genflavor, None),
                                           cut=0,
                                           weight=weights.weight(),
                                           msd=normalize(msd_matched, None))
                output['cutflow_eta'].fill(region=region,
                                           genflavor=normalize(genflavor, cut),
                                           cut=0,
                                           weight=weights.weight()[cut],
                                           eta=normalize(
                                               candidatejet.eta, cut))
                output['cutflow_pt'].fill(region=region,
                                          genflavor=normalize(genflavor, cut),
                                          cut=0,
                                          weight=weights.weight()[cut],
                                          pt=normalize(candidatejet.pt, cut))
                for i, cut in enumerate(cuts + ['ddcvbpass', 'ddcpass']):
                    allcuts.add(cut)
                    cut = selection.all(*allcuts)
                    output['cutflow_msd'].fill(region=region,
                                               genflavor=normalize(
                                                   genflavor, cut),
                                               cut=i + 1,
                                               weight=weights.weight()[cut],
                                               msd=normalize(msd_matched, cut))
                    output['cutflow_eta'].fill(
                        region=region,
                        genflavor=normalize(genflavor, cut),
                        cut=i + 1,
                        weight=weights.weight()[cut],
                        eta=normalize(candidatejet.eta, cut))
                    output['cutflow_pt'].fill(
                        region=region,
                        genflavor=normalize(genflavor, cut),
                        cut=i + 1,
                        weight=weights.weight()[cut],
                        pt=normalize(candidatejet.pt, cut))

                    if self._evtVizInfo and 'ddcpass' in allcuts and isRealData and region == 'signal':
                        if 'event' not in events.fields:
                            continue
                        _cut = selection.all(*allcuts, '_strict_mass',
                                             '_high_score')
                        # _cut = selection.all('_strict_mass'')
                        output['to_check'][
                            'mass'] += processor.column_accumulator(
                                normalize(msd_matched, _cut))
                        nfatjet = ak.sum(
                            ((fatjets.pt > 200) &
                             (abs(fatjets.eta) < 2.5) & fatjets.isTight),
                            axis=1)
                        output['to_check'][
                            'njet'] += processor.column_accumulator(
                                normalize(nfatjet, _cut))
                        output['to_check'][
                            'fname'] += processor.column_accumulator(
                                np.array([events.metadata['filename']] *
                                         len(normalize(msd_matched, _cut))))
                        output['to_check'][
                            'event'] += processor.column_accumulator(
                                normalize(events.event, _cut))
                        output['to_check'][
                            'luminosityBlock'] += processor.column_accumulator(
                                normalize(events.luminosityBlock, _cut))
                        output['to_check'][
                            'run'] += processor.column_accumulator(
                                normalize(events.run, _cut))

        if shift_name is None:
            systematics = [None] + list(weights.variations)
        else:
            systematics = [shift_name]

        def fill(region, systematic, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            if wmod is None:
                if systematic in weights.variations:
                    weight = weights.weight(modifier=systematic)[cut]
                else:
                    weight = weights.weight()[cut]
            else:
                weight = weights.weight()[cut] * wmod[cut]

            output['templates'].fill(
                region=region,
                systematic=sname,
                runid=runmap(events.run)[cut],
                genflavor=normalize(genflavor, cut),
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(msd_matched, cut),
                ddb=normalize(bvl, cut),
                ddc=normalize(cvl, cut),
                ddcvb=normalize(cvb, cut),
                weight=weight,
            )
            if region in [
                    'wtag', 'wtag0', 'wtag2', 'wtag3', 'wtag4', 'wtag5',
                    'wtag6', 'wtag7', 'noselection'
            ]:  # and sname in ['nominal', 'pileup_weightDown', 'pileup_weightUp', 'jet_triggerDown', 'jet_triggerUp']:
                output['wtag'].fill(
                    region=region,
                    systematic=sname,
                    genflavor=normalize(genflavor, cut),
                    pt=normalize(candidatejet.pt, cut),
                    msd=normalize(msd_matched, cut),
                    n2ddt=normalize(candidatejet.n2ddt, cut),
                    ddc=normalize(cvl, cut),
                    ddcvb=normalize(cvb, cut),
                    weight=weight,
                )
            # if region in ['signal', 'noselection']:
            #     output['etaphi'].fill(
            #         region=region,
            #         systematic=sname,
            #         runid=runmap(events.run)[cut],
            #         genflavor=normalize(genflavor, cut),
            #         pt=normalize(candidatejet.pt, cut),
            #         eta=normalize(candidatejet.eta, cut),
            #         phi=normalize(candidatejet.phi, cut),
            #         ddc=normalize(cvl, cut),
            #         ddcvb=normalize(cvb, cut),
            #     ),
            if not isRealData:
                if wmod is not None:
                    _custom_weight = events.genWeight[cut] * wmod[cut]
                else:
                    _custom_weight = np.ones_like(weight)
                output['genresponse_noweight'].fill(
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=_custom_weight,
                )

                output['genresponse'].fill(
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=weight,
                )
            if systematic is None:
                output['signal_opt'].fill(
                    region=region,
                    genflavor=normalize(genflavor, cut),
                    ddc=normalize(cvl, cut),
                    ddcvb=normalize(cvb, cut),
                    msd=normalize(msd_matched, cut),
                    weight=weight,
                )
                output['signal_optb'].fill(
                    region=region,
                    genflavor=normalize(genflavor, cut),
                    ddb=normalize(bvl, cut),
                    msd=normalize(msd_matched, cut),
                    weight=weight,
                )

        for region in regions:
            cut = selection.all(*(set(regions[region]) - {'n2ddt'}))
            if shift_name is None:
                output['nminus1_n2ddt'].fill(
                    region=region,
                    n2ddt=normalize(candidatejet.n2ddt, cut),
                    weight=weights.weight()[cut],
                )
            for systematic in systematics:
                if isRealData and systematic is not None:
                    continue
                fill(region, systematic)
            if shift_name is None and 'GluGluH' in dataset and 'LHEWeight' in events.fields:
                for i in range(9):
                    fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:,
                                                                          i])
                for c in events.LHEWeight.fields[1:]:
                    fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c])

        toc = time.time()
        output["filltime"] = toc - tic
        if shift_name is None:
            output["weightStats"] = weights.weightStatistics
        return {dataset: output}
Ejemplo n.º 27
0
    def process(self, events):
        def normalize(val, cut):
            return ak.to_numpy(ak.fill_none(
                val[cut],
                np.nan))  #val[cut].pad(1, clip=True).fillna(0).flatten()

        def fill(region, cuts, systematic=None, wmod=None):
            print('filling %s' % region)
            selections = cuts

            cut = selection.all(*selections)
            if 'signal' in region: weight = weights_signal.weight()[cut]
            elif 'muonCR' in region: weight = weights_muonCR.weight()[cut]
            elif 'VtaggingCR' in region:
                weight = weights_VtaggingCR.weight()[cut]
            output['templates'].fill(
                dataset=dataset,
                region=region,
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                n2ddt=normalize(candidatejet.n2ddt, cut),
                #gruddt=normalize(candidatejet.gruddt, cut),
                in_v3_ddt=normalize(candidatejet.in_v3_ddt, cut),
                hadW=normalize(candidatejet.nmatcheddau, cut),
                weight=weight,
            ),
            output['event'].fill(
                dataset=dataset,
                region=region,
                MET=events.MET.pt[cut],
                #nJet=fatjets.counts[cut],
                nPFConstituents=normalize(candidatejet.nPFConstituents, cut),
                weight=weight,
            ),
            output['deepAK8'].fill(
                dataset=dataset,
                region=region,
                deepTagMDWqq=normalize(candidatejet.deepTagMDWqq, cut),
                deepTagMDZqq=normalize(candidatejet.deepTagMDZqq, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                #genflavor=genflavor[cut],
                weight=weight,
            ),
            output['in_v3'].fill(
                dataset=dataset,
                region=region,
                #genflavor=genflavor[cut],
                in_v3=normalize(candidatejet.in_v3, cut),
                n2=normalize(candidatejet.n2b1, cut),
                gru=normalize(candidatejet.gru, cut),
                weight=weight,
            ),
            if 'muonCR' in dataset or 'VtaggingCR' in dataset:
                output['muon'].fill(
                    dataset=dataset,
                    region=region,
                    mu_pt=normalize(candidatemuon.pt, cut),
                    mu_eta=normalize(candidatemuon.eta, cut),
                    mu_pfRelIso04_all=normalize(candidatemuon.pfRelIso04_all,
                                                cut),
                    weight=weight,
                ),

        #common jet kinematics
        gru = events.GRU
        IN = events.IN
        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['gruddt'] = gru.v25 - shift(
            fatjets, algo='gruddt', year='2017')
        fatjets['gru'] = gru.v25
        fatjets['in_v3'] = IN.v3
        fatjets['in_v3_ddt'] = IN.v3 - shift(
            fatjets, algo='inddt', year='2017')
        fatjets['in_v3_ddt_90pctl'] = IN.v3 - shift(
            fatjets, algo='inddt90pctl', year='2017')
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year='2017')
        fatjets['nmatcheddau'] = TTsemileptonicmatch(events)
        dataset = events.metadata['dataset']
        print('process dataset', dataset)
        isRealData = not hasattr(events, 'genWeight')
        output = self.accumulator.identity()
        if (len(events) == 0): return output

        selection = PackedSelection('uint64')

        weights_signal = Weights(len(events))
        weights_muonCR = Weights(len(events))
        weights_VtaggingCR = Weights(len(events))

        if not isRealData:
            output['sumw'][dataset] += ak.sum(events.genWeight)

        #######################
        if 'signal' in self._region:
            if isRealData:
                trigger_fatjet = np.zeros(len(events), dtype='bool')
                for t in self._triggers[self._year]:
                    try:
                        trigger_fatjet = trigger_fatjet | events.HLT[t]
                    except:
                        print('trigger %s not available' % t)
                        continue

            else:
                trigger_fatjet = np.ones(len(events), dtype='bool')

            fatjets["genMatchFull"] = VQQgenmatch(events)
            candidatejet = ak.firsts(fatjets)
            candidatejet["genMatchFull"] = VQQgenmatch(events)
            nelectrons = ak.sum(
                (events.Electron.pt > 10.)
                & (abs(events.Electron.eta) < 2.5)
                & (events.Electron.cutBased >= events.Electron.VETO),
                axis=1,
            )
            nmuons = ak.sum(
                (events.Muon.pt > 10)
                & (abs(events.Muon.eta) < 2.1)
                & (events.Muon.pfRelIso04_all < 0.4)
                & (events.Muon.looseId),
                axis=1,
            )
            ntaus = ak.sum(
                (events.Tau.pt > 20.)
                & (events.Tau.idDecayMode)
                & (events.Tau.rawIso < 5)
                & (abs(events.Tau.eta) < 2.3),
                axis=1,
            )

            cuts = {
                "S_fatjet_trigger":
                trigger_fatjet,
                "S_pt":
                candidatejet.pt > 525,
                "S_eta": (abs(candidatejet.eta) < 2.5),
                "S_msdcorr": (candidatejet.msdcorr > 40),
                "S_rho":
                ((candidatejet.qcdrho > -5.5) & (candidatejet.qcdrho < -2.)),
                "S_jetid": (candidatejet.isTight),
                "S_VQQgenmatch": (candidatejet.genMatchFull),
                "S_noelectron": (nelectrons == 0),
                "S_nomuon": (nmuons == 0),
                "S_notau": (ntaus == 0),
            }

            for name, cut in cuts.items():
                print(name, cut)
                selection.add(name, cut)

            if isRealData:
                genflavor = 0  #candidatejet.pt.zeros_like().pad(1, clip=True).fillna(-1).flatten()
            if not isRealData:
                weights_signal.add('genweight', events.genWeight)
                #add_pileup_weight(weights_signal, events.Pileup.nPU, self._year, dataset)
                add_jetTriggerWeight(weights_signal, candidatejet.msdcorr,
                                     candidatejet.pt, self._year)
                bosons = getBosons(events.GenPart)
                genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
                add_VJets_NLOkFactor(weights_signal, genBosonPt, self._year,
                                     dataset)
                #genflavor = matchedBosonFlavor(candidatejet, bosons).pad(1, clip=True).fillna(-1).flatten()

            allcuts_signal = set()
            output['cutflow_signal'][dataset]['none'] += float(
                weights_signal.weight().sum())
            for cut in cuts:
                allcuts_signal.add(cut)
                output['cutflow_signal'][dataset][cut] += float(
                    weights_signal.weight()[selection.all(
                        *allcuts_signal)].sum())

            fill('signal', cuts.keys())

        #######################
        if 'muonCR' in self._region:

            if isRealData:
                trigger_muon = np.zeros(len(events), dtype='bool')
                for t in self._muontriggers[self._year]:
                    trigger_muon = trigger_muon | events.HLT[t]
            else:
                trigger_muon = np.ones(len(events), dtype='bool')

            candidatejet = ak.firsts(fatjets)
            candidatemuon = events.Muon[:, :5]

            jets = events.Jet[((events.Jet.pt > 50.)
                               & (abs(events.Jet.eta) < 2.5)
                               & (events.Jet.isTight))][:, :4]

            dphi = abs(jets.delta_phi(candidatejet))

            ak4_away = jets[(dphi > 0.8)]

            nelectrons = ak.sum(
                (events.Electron.pt > 10.)
                & (abs(events.Electron.eta) < 2.5)
                & (events.Electron.cutBased >= events.Electron.VETO),
                axis=1,
            )
            nmuons = ak.sum(
                (events.Muon.pt > 10)
                & (abs(events.Muon.eta) < 2.4)
                & (events.Muon.pfRelIso04_all < 0.25)
                & (events.Muon.looseId),
                axis=1,
            )
            ntaus = ak.sum(
                (events.Tau.pt > 20.)
                & (events.Tau.idDecayMode)
                & (events.Tau.rawIso < 5)
                & (abs(events.Tau.eta) < 2.3)
                & (events.Tau.idMVAoldDM2017v1 >= 16),
                axis=1,
            )

            cuts = {
                "CR1_muon_trigger":
                trigger_muon,
                "CR1_jet_pt": (candidatejet.pt > 525),
                "CR1_jet_eta": (abs(candidatejet.eta) < 2.5),
                "CR1_jet_msd": (candidatejet.msdcorr > 40),
                "CR1_jet_rho":
                ((candidatejet.qcdrho > -5.5) & (candidatejet.qcdrho < -2.)),
                "CR1_mu_pt":
                ak.any(candidatemuon.pt > 55, axis=1),
                "CR1_mu_eta":
                ak.any(abs(candidatemuon.eta) < 2.1, axis=1),
                "CR1_mu_IDLoose":
                ak.any(candidatemuon.looseId, axis=1),
                "CR1_mu_isolationTight":
                ak.any(candidatemuon.pfRelIso04_all < 0.15, axis=1),
                "CR1_muonDphiAK8":
                ak.any(
                    abs(candidatemuon.delta_phi(candidatejet)) > 2 * np.pi / 3,
                    axis=1),
                "CR1_ak4btagMedium08":
                (ak.max(ak4_away.btagCSVV2, axis=1, mask_identity=False) >
                 BTagEfficiency.btagWPs[self._year]['medium']
                 ),  #(ak4_away.btagCSVV2.max() > 0.8838),
                "CR1_noelectron": (nelectrons == 0),
                "CR1_onemuon": (nmuons == 1),
                "CR1_notau": (ntaus == 0),
            }
            for name, cut in cuts.items():
                selection.add(name, cut)

            if isRealData:
                genflavor = 0  #candidatejet.pt.zeros_like().pad(1, clip=True).fillna(-1).flatten()
            if not isRealData:
                weights_muonCR.add('genweight', events.genWeight)
                #add_pileup_weight(weights_muonCR, events.Pileup.nPU, self._year, dataset)
                #add_singleMuTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year)
                bosons = getBosons(events.GenPart)
                genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
                #add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
                #genflavor = matchedBosonFlavor(candidatejet, bosons).pad(1, clip=True).fillna(-1).flatten()

            allcuts_ttbar_muoncontrol = set()
            output['cutflow_muonCR'][dataset]['none'] += float(
                weights_muonCR.weight().sum())
            for cut in cuts:
                allcuts_ttbar_muoncontrol.add(cut)
                output['cutflow_muonCR'][dataset][cut] += float(
                    weights_muonCR.weight()[selection.all(
                        *allcuts_ttbar_muoncontrol)].sum())
            fill('muonCR', cuts.keys())

        #######################
        if 'VtaggingCR' in self._region:
            if isRealData:
                trigger_muon = np.zeros(len(events), dtype='bool')
                for t in self._muontriggers[self._year]:
                    trigger_muon = trigger_muon | events.HLT[t]
            else:
                trigger_muon = np.ones(len(events), dtype='bool')

            candidatejet = ak.firsts(fatjets)
            candidatemuon = ak.firsts(events.Muon)

            jets = events.Jet[((events.Jet.pt > 30.)
                               & (abs(events.Jet.eta) < 2.4))][:, :4]

            dr_ak4_ak8 = jets.delta_r(candidatejet)
            dr_ak4_muon = jets.delta_r(candidatemuon)

            ak4_away = jets[(dr_ak4_ak8 > 0.8)]  # & (dr_ak4_muon > 0.4)]
            mu_p4 = ak.zip(
                {
                    "pt": ak.fill_none(candidatemuon.pt, 0),
                    "eta": ak.fill_none(candidatemuon.eta, 0),
                    "phi": ak.fill_none(candidatemuon.phi, 0),
                    "mass": ak.fill_none(candidatemuon.mass, 0),
                },
                with_name="PtEtaPhiMLorentzVector")

            met_p4 = ak.zip(
                {
                    "pt": ak.from_iter([[v] for v in events.MET.pt]),
                    "eta": ak.from_iter([[v] for v in np.zeros(len(events))]),
                    "phi": ak.from_iter([[v] for v in events.MET.phi]),
                    "mass": ak.from_iter([[v] for v in np.zeros(len(events))]),
                },
                with_name="PtEtaPhiMLorentzVector")

            Wleptoniccandidate = mu_p4 + met_p4

            nelectrons = ak.sum(
                ((events.Electron.pt > 10.)
                 & (abs(events.Electron.eta) < 2.5)
                 & (events.Electron.cutBased >= events.Electron.VETO)),
                axis=1,
            )
            n_tight_muon = ak.sum(
                ((events.Muon.pt > 53)
                 & (abs(events.Muon.eta) < 2.1)
                 & (events.Muon.tightId)),
                axis=1,
            )
            n_loose_muon = ak.sum(
                ((events.Muon.pt > 20)
                 & (events.Muon.looseId)
                 & (abs(events.Muon.eta) < 2.4)),
                axis=1,
            )
            ntaus = ak.sum(
                ((events.Tau.pt > 20.)
                 & (events.Tau.idDecayMode)
                 & (events.Tau.rawIso < 5)
                 & (abs(events.Tau.eta) < 2.3)
                 & (events.Tau.idMVAoldDM2017v1 >= 16)),
                axis=1,
            )

            cuts = {
                "CR2_muon_trigger":
                trigger_muon,
                "CR2_jet_pt": (candidatejet.pt > 200),
                "CR2_jet_eta": (abs(candidatejet.eta) < 2.5),
                "CR2_jet_msd": (candidatejet.msdcorr > 40),
                "CR2_mu_pt":
                candidatemuon.pt > 53,
                "CR2_mu_eta": (abs(candidatemuon.eta) < 2.1),
                "CR2_mu_IDTight":
                candidatemuon.tightId,
                "CR2_mu_isolationTight": (candidatemuon.pfRelIso04_all < 0.15),
                "CR2_muonDphiAK8":
                abs(candidatemuon.delta_phi(candidatejet)) > 2 * np.pi / 3,
                "CR2_ak4btagMedium08":
                (ak.max(ak4_away.btagCSVV2, axis=1, mask_identity=False) >
                 BTagEfficiency.btagWPs[self._year]['medium']),
                "CR2_leptonicW":
                ak.flatten(Wleptoniccandidate.pt > 200),
                "CR2_MET": (events.MET.pt > 40.),
                "CR2_noelectron": (nelectrons == 0),
                "CR2_one_tightMuon": (n_tight_muon == 1),
                "CR2_one_looseMuon": (n_loose_muon == 1),
                #"CR2_notau"            : (ntaus==0),
            }

            for name, cut in cuts.items():
                print(name, cut)
                selection.add(name, cut)
            #weights.add('metfilter', events.Flag.METFilters)
            if isRealData:
                genflavor = 0  #candidatejet.pt.zeros_like().pad(1, clip=True).fillna(-1).flatten()
            if not isRealData:
                weights_VtaggingCR.add('genweight', events.genWeight)
                #add_pileup_weight(weights_VtaggingCR, events.Pileup.nPU, self._year, dataset)
                #add_singleMuTriggerWeight(weights, abs(candidatemuon.eta), candidatemuon.pt, self._year)
                bosons = getBosons(events.GenPart)
                genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
                #add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
                #genflavor = matchedBosonFlavor(candidatejet, bosons).pad(1, clip=True).fillna(-1).flatten()

                #b-tag weights
            allcuts_vselection = set()
            output['cutflow_VtaggingCR'][dataset]['none'] += float(
                weights_VtaggingCR.weight().sum())

            for cut in cuts:
                allcuts_vselection.add(cut)
                output['cutflow_VtaggingCR'][dataset][cut] += float(
                    weights_VtaggingCR.weight()[selection.all(
                        *allcuts_vselection)].sum())
            fill('VtaggingCR', cuts.keys())

        return output