Example #1
0
    def process(self, df):
        output = self.accumulator.identity()
        if df.size == 0: return output

        dataset = df['dataset']
        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        genjets = JaggedCandidateArray.candidatesfromcounts(
            df['genjet_p4'],
            px=df['genjet_p4.fCoordinates.fX'].content,
            py=df['genjet_p4.fCoordinates.fY'].content,
            pz=df['genjet_p4.fCoordinates.fZ'].content,
            energy=df['genjet_p4.fCoordinates.fT'].content,
        )
        genparticles = JaggedCandidateArray.candidatesfromcounts(
            df['gen_p4'],
            px=df['gen_p4.fCoordinates.fX'].content,
            py=df['gen_p4.fCoordinates.fY'].content,
            pz=df['gen_p4.fCoordinates.fZ'].content,
            energy=df['gen_p4.fCoordinates.fT'].content,
            pid=df['gen_pid'].content,
        )
        darkphotons = genparticles[genparticles.pid == 32]
        dpptmax = darkphotons.pt.max()

        mask_ = genjets.match(darkphotons, deltaRCut=0.4)
        genjets = genjets[~mask_]

        output['njets'].fill(
            dataset=dataset,
            cnt=genjets[genjets.pt > dpptmax].counts,
            weight=weight,
        )

        return output
Example #2
0
    def process(self, events):

        dataset = events.metadata['dataset']

        isData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        hout = self.accumulator.identity()

        ###
        #Getting ids from .coffea files
        ###

        get_msd_weight = self._corrections['get_msd_weight']
        isLooseMuon = self._ids['isLooseMuon']
        isTightMuon = self._ids['isTightMuon']
        isGoodFatJet = self._ids['isGoodFatJet']

        match = self._common['match']

        ###
        #Initialize physics objects
        ###

        mu = events.Muon
        leading_mu = mu[mu.pt.argmax()]

        fj = events.AK15Puppi
        fj['sd'] = fj.subjets.sum()
        fj['isgood'] = isGoodFatJet(fj.sd.pt, fj.sd.eta, fj.jetId)
        fj['T'] = TVector2Array.from_polar(fj.pt, fj.phi)
        fj['msd_raw'] = (fj.subjets * (1 - fj.subjets.rawFactor)).sum().mass
        fj['msd_corr'] = fj.msd_raw * awkward.JaggedArray.fromoffsets(
            fj.array.offsets,
            np.maximum(
                1e-5, get_msd_weight(fj.sd.pt.flatten(), fj.sd.eta.flatten())))
        probQCD = fj.probQCDbb + fj.probQCDcc + fj.probQCDb + fj.probQCDc + fj.probQCDothers
        probZHbb = fj.probZbb + fj.probHbb
        fj['ZHbbvsQCD'] = probZHbb / (probZHbb + probQCD)
        fj['tau21'] = fj.tau2 / fj.tau1

        SV = events.SV

        ###
        # Calculating weights
        ###
        if not isData:

            gen = events.GenPart

            gen['isb'] = (abs(gen.pdgId) == 5) & gen.hasFlags(
                ['fromHardProcess', 'isLastCopy'])
            jetgenb = fj.sd.cross(gen[gen.isb], nested=True)
            bmatch = ((jetgenb.i0.delta_r(jetgenb.i1) < 1.5).sum()
                      == 1) & (gen[gen.isb].counts > 0)
            fj['isb'] = bmatch

            bmatch = ((jetgenb.i0.delta_r(jetgenb.i1) < 1.5).sum()
                      == 2) & (gen[gen.isb].counts > 0)
            fj['isbb'] = bmatch

            gen['isc'] = (abs(gen.pdgId) == 4) & gen.hasFlags(
                ['fromHardProcess', 'isLastCopy'])
            jetgenc = fj.sd.cross(gen[gen.isc], nested=True)
            cmatch = ((jetgenc.i0.delta_r(jetgenc.i1) < 1.5).sum()
                      == 1) & (gen[gen.isc].counts > 0)
            fj['isc'] = cmatch

            cmatch = ((jetgenc.i0.delta_r(jetgenc.i1) < 1.5).sum()
                      == 2) & (gen[gen.isc].counts > 0)
            fj['iscc'] = cmatch

        ##### axis=1 option to remove boundaries between fat-jets #####
        ##### copy (match jaggedness and shape of array) the contents of crossed array into the fat-jet subjets #####
        ##### we're not use copy since it keeps the original array type #####
        ##### fj.subjets is a TLorentzVectorArray #####
        mu = mu[mu.isGlobal]  ## Use a global muon for QCD events
        jetmu = fj.subjets.flatten(axis=1).cross(mu, nested=True)
        mask = (mu.counts > 0) & ((jetmu.i0.delta_r(jetmu.i1) < 0.4) &
                                  ((jetmu.i1.pt / jetmu.i0.pt) < 0.7) &
                                  (jetmu.i1.pt > 7)).sum() == 1

        ##### Three steps to match the jaggedness of the mask array to the fj.subjets array #####
        ##### Using the offset function to copy contents not the type of the array #####
        step1 = fj.subjets.flatten()
        step2 = awkward.JaggedArray.fromoffsets(step1.offsets, mask.content)
        step2 = step2.pad(1).fillna(
            0)  ##### Fill None for empty arrays and convert None to False
        step3 = awkward.JaggedArray.fromoffsets(fj.subjets.offsets, step2)

        ##### fatjet with two subjets matched with muons
        fj['withmu'] = step3.sum() == 2

        ###
        # Selections
        ###

        #### trigger selection ####
        triggers = np.zeros(events.size, dtype=np.bool)
        for path in self._btagmu_triggers[self._year]:
            if path not in events.HLT.columns: continue
            triggers = triggers | events.HLT[path]
        selection.add('btagmu_triggers', triggers)

        #### MET filters ####
        met_filters = np.ones(events.size, dtype=np.bool)
        if isData:
            met_filters = met_filters & events.Flag[
                'eeBadScFilter']  #this filter is recommended for data only
        for flag in AnalysisProcessor.met_filter_flags[self._year]:
            met_filters = met_filters & events.Flag[flag]
        selection.add('met_filters', met_filters)

        #### ak15 jet selection ####
        leading_fj = fj[fj.sd.pt.argmax()]
        leading_fj = leading_fj[leading_fj.isgood.astype(np.bool)]
        leading_fj = leading_fj[leading_fj.withmu.astype(np.bool)]

        #### SV selection for matched with leading ak15 jet ####
        SV['ismatched'] = match(SV, leading_fj, 1.5)
        #leading_SV = SV[SV.pt.argmax()]
        leading_SV = SV[SV.dxySig.argmax()]
        leading_SV = leading_SV[leading_SV.ismatched.astype(np.bool)]

        #fj_good = fj[fj.isgood.astype(np.bool)]
        #fj_withmu = fj_good[fj_good.withmu.astype(np.bool)]
        #fj_nwithmu = fj_withmu.counts

        selection.add('fj_pt', (leading_fj.sd.pt.max() > 250))
        selection.add(
            'fj_mass',
            (leading_fj.msd_corr.sum() > 50))  ## optionally also <130
        #selection.add('fj_tau21', (leading_fj.tau21.sum() < 0.3) )
        #selection.add('fjCoupledMu', (fj_nwithmu > 0) )

        print('Selections')
        print(selection.names, '\n')

        variables = {
            'ZHbbvsQCD': leading_fj.ZHbbvsQCD,
            'btagJP': leading_fj.btagJP,
            'tau21': leading_fj.tau21,
            'fjmass': leading_fj.msd_corr,
            'fj1pt': leading_fj.sd.pt,
            #'svmass':    leading_SV.mass,
            'svmass': np.log(leading_SV.mass),
            'svdxysig': leading_SV.dxySig
        }

        def fill(dataset, gentype, weight, cut):
            flat_variables = {
                k: v[cut].flatten()
                for k, v in variables.items()
            }
            flat_gentype = {
                k: (~np.isnan(v[cut]) * gentype[cut]).flatten()
                for k, v in variables.items()
            }
            flat_weight = {
                k: (~np.isnan(v[cut]) * weight[cut]).flatten()
                for k, v in variables.items()
            }

            #print('variables:', flat_variables)
            for histname, h in hout.items():
                if not isinstance(h, hist.Hist):
                    continue
                if histname not in variables:
                    continue
                elif histname == 'sumw':
                    continue
                elif histname == 'jptemplate' or histname == 'svtemplate':
                    continue
                else:
                    flat_variable = {histname: flat_variables[histname]}
                    h.fill(dataset=dataset,
                           gentype=flat_gentype[histname],
                           **flat_variable,
                           weight=flat_weight[histname])

        isFilled = False
        if isData:
            if not isFilled:
                hout['sumw'].fill(dataset=dataset, sumw=1, weight=1)
                isFilled = True

            cut = selection.all(*selection.names)
            vcut = np.zeros(events.size, dtype=np.int)
            hout['cutflow'].fill(dataset=dataset,
                                 cutname='nocut',
                                 cut=vcut,
                                 weight=np.ones(events.size))
            allcuts = set()
            ### cutflow fill
            for i, icut in enumerate(selection.names):
                allcuts.add(icut)
                jcut = selection.all(*allcuts)
                vcut = (i + 1) * jcut
                hout['cutflow'].fill(dataset=dataset,
                                     cutname=str(icut),
                                     cut=vcut,
                                     weight=jcut)

            ##### template for bb SF #####
            ##### btagjp template #####
            hout['jptemplate'].fill(dataset=dataset,
                                    gentype=np.zeros(events.size,
                                                     dtype=np.int),
                                    btagJP=leading_fj.btagJP.sum(),
                                    ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                                    weight=np.ones(events.size) * cut)
            ##### sv mass template #####
            hout['svtemplate'].fill(
                dataset=dataset,
                gentype=np.zeros(events.size, dtype=np.int),
                #svmass=leading_SV.mass.sum(),
                svmass=np.log(leading_SV.mass.sum()),
                ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                weight=np.ones(events.size) * cut)
            fill(dataset, np.zeros(events.size, dtype=np.int),
                 np.ones(events.size), cut)

        else:
            weights = processor.Weights(len(events))

            wgentype = {
                'bb': (leading_fj.isbb).sum(),
                'b': (~leading_fj.isbb & leading_fj.isb).sum(),
                'cc':
                (~leading_fj.isbb & ~leading_fj.isb & leading_fj.iscc).sum(),
                'c': (~leading_fj.isbb & ~leading_fj.isb & ~leading_fj.iscc
                      & leading_fj.isc).sum(),
                'other': (~leading_fj.isbb & ~leading_fj.isb & ~leading_fj.iscc
                          & ~leading_fj.isc).sum(),
            }
            vgentype = np.zeros(events.size, dtype=np.int)
            for gentype in self._gentype_map.keys():
                vgentype += self._gentype_map[gentype] * wgentype[gentype]

            if not isFilled:
                hout['sumw'].fill(dataset=dataset,
                                  sumw=1,
                                  weight=events.genWeight.sum())
                isFilled = True

            cut = selection.all(*selection.names)
            if 'QCD' in dataset:
                vcut = np.zeros(events.size, dtype=np.int)
                hout['cutflow'].fill(dataset=dataset,
                                     cutname='nocut',
                                     cut=vcut,
                                     weight=weights.weight())
                allcuts = set()
                ### cutflow fill
                for i, icut in enumerate(selection.names):
                    allcuts.add(icut)
                    jcut = selection.all(*allcuts)
                    vcut = (i + 1) * jcut
                    hout['cutflow'].fill(dataset=dataset,
                                         cutname=str(icut),
                                         cut=vcut,
                                         weight=weights.weight() * jcut)

                ### other variables
                fill(dataset, vgentype, weights.weight(), cut)

                ##### template for bb SF #####
                ##### btagjp template #####
                hout['jptemplate'].fill(dataset=dataset,
                                        gentype=vgentype,
                                        btagJP=leading_fj.btagJP.sum(),
                                        ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                                        weight=weights.weight() * cut)
                ##### sv mass template #####
                hout['svtemplate'].fill(
                    dataset=dataset,
                    gentype=vgentype,
                    #svmass=leading_SV.mass.sum(),
                    svmass=np.log(leading_SV.mass.sum()),
                    ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                    weight=np.ones(events.size) * cut)
            else:
                fill(dataset, vgentype, weights.weight(),
                     np.ones(events.size, dtype=np.int))

                ##### template for bb SF #####
                ##### btagjp template #####
                hout['jptemplate'].fill(dataset=dataset,
                                        gentype=vgentype,
                                        btagJP=leading_fj.btagJP.sum(),
                                        ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                                        weight=weights.weight())
                ##### sv mass template #####
                hout['svtemplate'].fill(
                    dataset=dataset,
                    gentype=vgentype,
                    #svmass=leading_SV.mass.sum(),
                    svmass=np.log(leading_SV.mass.sum()),
                    ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                    weight=np.ones(events.size) * cut)

        return hout
Example #3
0
    def process(self, df):
        output = self.accumulator.identity()
        dataset = df['dataset']

        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)

        weight = wgts.weight()
        ########################

        ak4jets = JaggedCandidateArray.candidatesfromcounts(
            df['akjet_ak4PFJetsCHS_p4'],
            px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'].content,
            py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'].content,
            pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'].content,
            energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'].content,
            jetid=df['akjet_ak4PFJetsCHS_jetid'].content,
        )
        ak4jets = ak4jets[ak4jets.jetid & (ak4jets.pt > 30) &
                          (np.abs(ak4jets.eta) < 2.4)]

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'].content,
            py=df['pfjet_p4.fCoordinates.fY'].content,
            pz=df['pfjet_p4.fCoordinates.fZ'].content,
            energy=df['pfjet_p4.fCoordinates.fT'].content,
            sumtkpt=df['pfjet_tkPtSum05'].content,
            pfiso=df['pfjet_pfIsolationNoPU05'].content,
            mintkdist=df['pfjet_pfcands_minTwoTkDist'].content,
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype == 3).sum()
        ndsa = (ljdautype == 8).sum()
        isegammajet = (npfmu == 0) & (ndsa == 0)
        ispfmujet = (npfmu >= 2) & (ndsa == 0)
        isdsajet = ndsa > 0
        label = isegammajet.astype(int) * 1 + ispfmujet.astype(
            int) * 2 + isdsajet.astype(int) * 3
        leptonjets.add_attributes(label=label, ndsa=ndsa)
        nmu = ((ljdautype == 3) | (ljdautype == 8)).sum()
        leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0))
        ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum()
        leptonjets.add_attributes(qsum=ljdaucharge)
        leptonjets.add_attributes(
            isneutral=(leptonjets.iseltype
                       | (leptonjets.ismutype & (leptonjets.qsum == 0))))
        leptonjets.add_attributes(
            mucharged=(leptonjets.iseltype
                       | (leptonjets.ismutype & (leptonjets.qsum != 0))))
        ljdsamuSubset = fromNestNestIndexArray(
            df['dsamuon_isSubsetFilteredCosmic1Leg'],
            awkward.fromiter(df['pfjet_pfcand_dsamuonIdx']))
        leptonjets.add_attributes(nocosmic=(ljdsamuSubset.sum() == 0))
        leptonjets = leptonjets[(leptonjets.nocosmic) & (leptonjets.pt > 30) &
                                (leptonjets.mintkdist < 50)]

        ## __ twoleptonjets__
        twoleptonjets = leptonjets.counts >= 2
        dileptonjets = leptonjets[twoleptonjets]
        ak4jets = ak4jets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size == 0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ak4jets = ak4jets[ak4jets.pt > (lj0.pt.flatten())]
        ak4jetCounts = (ak4jets.counts > 0).astype(int)
        minpfiso = ((lj0.pfiso > lj1.pfiso).astype(int) * lj1.pfiso +
                    (lj0.pfiso < lj1.pfiso).astype(int) * lj0.pfiso).flatten()
        ljneutrality = (
            (lj0.isneutral & lj1.isneutral).astype(int) * 1 +
            (lj0.mucharged & lj1.mucharged).astype(int) * 2).flatten()

        ## channel def ##
        #### 2mu2e
        singleMuljEvents = dileptonjets.ismutype.sum() == 1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = singleMuljEvents & muljInLeading2Events

        output['chan-2mu2e'].fill(dataset=dataset,
                                  iso=minpfiso[channel_2mu2e],
                                  val=ak4jetCounts[channel_2mu2e],
                                  weight=wgt[channel_2mu2e])

        #### 4mu
        doubleMuljEvents = dileptonjets.ismutype.sum() == 2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = doubleMuljEvents & muljIsLeading2Events

        output['chan-4mu'].fill(dataset=dataset,
                                iso=minpfiso[channel_4mu],
                                val=ljneutrality[channel_4mu],
                                weight=wgt[channel_4mu])

        ###########

        return output
    def process(self, df):
        output = self.accumulator.identity()
        if df.size==0: return output

        dataset = df['dataset']

        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type!='data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        ak4jets = JaggedCandidateArray.candidatesfromcounts(
            df['akjet_ak4PFJetsCHS_p4'],
            px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'].content,
            py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'].content,
            pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'].content,
            energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'].content,
            hadfrac=df['akjet_ak4PFJetsCHS_hadronEnergyFraction'].content,
            jetid=df['akjet_ak4PFJetsCHS_jetid'].content,
            deepcsv=df['hftagscore_DeepCSV_b'].content,
        )
        deepcsv_tight = np.bitwise_and(ak4jets.deepcsv, 1<<2)==(1<<2)
        ak4jets.add_attributes(deepcsvTight=deepcsv_tight)
        ak4jets=ak4jets[ak4jets.jetid&(ak4jets.pt>20)&(np.abs(ak4jets.eta)<2.5)]

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'].content,
            py=df['pfjet_p4.fCoordinates.fY'].content,
            pz=df['pfjet_p4.fCoordinates.fZ'].content,
            energy=df['pfjet_p4.fCoordinates.fT'].content,
            ncands=df['pfjet_pfcands_n'].content,
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype==3).sum()
        ndsa = (ljdautype==8).sum()
        isegammajet = (npfmu==0)&(ndsa==0)
        ispfmujet = (npfmu>=2)&(ndsa==0)
        isdsajet = ndsa>0
        label = isegammajet.astype(int)*1+ispfmujet.astype(int)*2+isdsajet.astype(int)*3
        leptonjets.add_attributes(label=label, ndsa=ndsa)
        nmu = ((ljdautype==3)|(ljdautype==8)).sum()
        leptonjets.add_attributes(ismutype=(nmu>=2), iseltype=(nmu==0))

        leptonjets.add_attributes(muontiming=awkward.fromiter(df['pfjet_pfcand_muonTime']).mean())

        ## __ twoleptonjets__
        twoleptonjets = leptonjets.counts>=2
        dileptonjets = leptonjets[twoleptonjets]
        ak4jets = ak4jets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size==0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum()==1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents&muljInLeading2Events).astype(int)*1

        doubleMuljEvents = dileptonjets.ismutype.sum()==2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents&muljIsLeading2Events).astype(int)*2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        cuts = [
            np.ones_like(wgt).astype(bool),                        # all
            (np.abs(lj0.p4.delta_phi(lj1.p4))>np.pi/2).flatten(),  # dphi > pi/2
            ak4jets.counts<4,                                      # N(jets) < 4
            ak4jets[(ak4jets.pt>30)&(np.abs(ak4jets.eta)<2.4)&ak4jets.deepcsvTight].counts==0, # N(tightB)==0
            (~channel_2mu2e.astype(bool)) | (channel_2mu2e.astype(bool)&(((lj0.iseltype)&(lj0.pt>40)) | ((lj1.iseltype)&(lj1.pt>40))).flatten() ), # EGMpt0>40
            ( (lj0.ismutype&(lj0.pt>40)) | ((~lj0.ismutype)&(lj1.ismutype&(lj1.pt>40))) ).flatten(), # Mupt0>40
            ( (~(channel_==2)) | (channel_==2)&((lj1.pt>30).flatten()) ), # Mupt1>30
        ]

        if self.region == 'CR':
            cuts[1] = ~cuts[1]

        totcut = np.logical_and.reduce(cuts)

        dileptonjets = dileptonjets[totcut]
        wgt = wgt[totcut]
        channel_ = channel_[totcut]

        ljmu = dileptonjets[dileptonjets.ismutype]
        ljmuones = ljmu.pt.ones_like()

        output['ndsa'].fill(dataset=dataset, cnt=ljmu.ndsa.flatten(), weight=(wgt*ljmuones).flatten(), channel=(channel_*ljmuones).flatten())
        output['mutiming'].fill(dataset=dataset, t=ljmu.muontiming.flatten(), weight=(wgt*ljmuones).flatten(), channel=(channel_*ljmuones).flatten())

        return output
Example #5
0
    def process(self, df):
        # Dataset parameters
        dataset = df['dataset']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']
        datasets = [
            'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon',
            'DoubleElectron'
        ]
        for d in datasets:
            if d in dataset: dataset = dataset.split('_')[0]

        ### Recover objects, selection, functions and others...
        # Objects
        isTightMuon = self._objects['isTightMuonPOG']
        isTightElectron = self._objects['isTightElectronPOG']
        isGoodJet = self._objects['isGoodJet']
        isMuonMVA = self._objects[
            'isMuonMVA']  #isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15)
        isElecMVA = self._objects[
            'isElecMVA']  #isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15)

        # Corrections
        GetMuonIsoSF = self._corrections['getMuonIso']
        GetMuonIDSF = self._corrections['getMuonID']

        # Selection
        passNJets = self._selection['passNJets']
        passMETcut = self._selection['passMETcut']
        passTrigger = self._selection['passTrigger']

        # Functions
        pow2 = self._functions['pow2']
        IsClosestToZ = self._functions['IsClosestToZ']
        GetGoodTriplets = self._functions['GetGoodTriplets']

        # Initialize objects
        met = Initialize({
            'pt': df['MET_pt'],
            'eta': 0,
            'phi': df['MET_phi'],
            'mass': 0
        })
        e = Initialize({
            'pt': df['Electron_pt'],
            'eta': df['Electron_eta'],
            'phi': df['Electron_phi'],
            'mass': df['Electron_mass']
        })
        mu = Initialize({
            'pt': df['Muon_pt'],
            'eta': df['Muon_eta'],
            'phi': df['Muon_phi'],
            'mass': df['Muon_mass']
        })
        j = Initialize({
            'pt': df['Jet_pt'],
            'eta': df['Jet_eta'],
            'phi': df['Jet_phi'],
            'mass': df['Jet_mass']
        })

        # Electron selection
        for key in self._e:
            e[key] = e.pt.zeros_like()
            if self._e[key] in df:
                e[key] = df[self._e[key]]
        #e['isGood'] = isTightElectron(e.pt, e.eta, e.dxy, e.dz, e.id, e.tightChrage, year)
        e['isGood'] = isElecMVA(e.pt,
                                e.eta,
                                e.dxy,
                                e.dz,
                                e.miniIso,
                                e.sip3d,
                                e.mvaTTH,
                                e.elecMVA,
                                e.lostHits,
                                e.convVeto,
                                e.tightCharge,
                                minpt=10)
        leading_e = e[e.pt.argmax()]
        leading_e = leading_e[leading_e.isGood.astype(np.bool)]

        # Muon selection
        for key in self._mu:
            mu[key] = mu.pt.zeros_like()
            if self._mu[key] in df:
                mu[key] = df[self._mu[key]]
        #mu['istight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.iso, mu.tight_id, mu.tightCharge, year)
        mu['isGood'] = isMuonMVA(mu.pt,
                                 mu.eta,
                                 mu.dxy,
                                 mu.dz,
                                 mu.miniIso,
                                 mu.sip3d,
                                 mu.mvaTTH,
                                 mu.mediumPrompt,
                                 mu.tightCharge,
                                 minpt=10)
        leading_mu = mu[mu.pt.argmax()]
        leading_mu = leading_mu[leading_mu.isGood.astype(np.bool)]

        e = e[e.isGood.astype(np.bool)]
        mu = mu[mu.isGood.astype(np.bool)]
        nElec = e.counts
        nMuon = mu.counts

        twoLeps = (nElec + nMuon) == 2
        threeLeps = (nElec + nMuon) == 3
        twoElec = (nElec == 2)
        twoMuon = (nMuon == 2)
        e0 = e[e.pt.argmax()]
        m0 = mu[mu.pt.argmax()]

        # Jet selection
        j['deepjet'] = df['Jet_btagDeepFlavB']
        for key in self._jet:
            j[key] = j.pt.zeros_like()
            if self._jet[key] in df:
                j[key] = df[self._jet[key]]

        j['isgood'] = isGoodJet(j.pt, j.eta, j.id)
        j['isclean'] = ~j.match(e, 0.4) & ~j.match(mu, 0.4) & j.isgood.astype(
            np.bool)
        #goodJets = j[(j['isgood'])&(j['isclean'])]
        #j0 = goodJets[goodJets.pt.argmax()]
        #nJets = goodJets.counts

        ##################################################################
        ### 2 same-sign leptons
        ##################################################################

        # emu
        singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)]
        singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)]
        em = singe.cross(singm)
        emSSmask = (em.i0.charge * em.i1.charge > 0)
        emSS = em[emSSmask]
        nemSS = len(emSS.flatten())

        # ee and mumu
        # pt>-1 to preserve jagged dimensions
        ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)]
        mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)]

        eepairs = ee.distincts()
        eeSSmask = (eepairs.i0.charge * eepairs.i1.charge > 0)
        eeonZmask = (np.abs((eepairs.i0 + eepairs.i1).mass - 91) < 15)
        eeoffZmask = (eeonZmask == 0)

        mmpairs = mm.distincts()
        mmSSmask = (mmpairs.i0.charge * mmpairs.i1.charge > 0)
        mmonZmask = (np.abs((mmpairs.i0 + mmpairs.i1).mass - 91) < 15)
        mmoffZmask = (mmonZmask == 0)

        eeSSonZ = eepairs[eeSSmask & eeonZmask]
        eeSSoffZ = eepairs[eeSSmask & eeoffZmask]
        mmSSonZ = mmpairs[mmSSmask & mmonZmask]
        mmSSoffZ = mmpairs[mmSSmask & mmoffZmask]
        neeSS = len(eeSSonZ.flatten()) + len(eeSSoffZ.flatten())
        nmmSS = len(mmSSonZ.flatten()) + len(mmSSoffZ.flatten())

        #print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS))

        # Cuts
        eeSSmask = (eeSSmask[eeSSmask].counts > 0)
        mmSSmask = (mmSSmask[mmSSmask].counts > 0)
        eeonZmask = (eeonZmask[eeonZmask].counts > 0)
        eeoffZmask = (eeoffZmask[eeoffZmask].counts > 0)
        mmonZmask = (mmonZmask[mmonZmask].counts > 0)
        mmoffZmask = (mmoffZmask[mmoffZmask].counts > 0)
        emSSmask = (emSSmask[emSSmask].counts > 0)

        # njets
        goodJets = j[(j.isclean) & (j.isgood)]
        njets = goodJets.counts
        ht = goodJets.pt.sum()
        j0 = goodJets[goodJets.pt.argmax()]

        # nbtags
        nbtags = goodJets[goodJets.deepjet > 0.2770].counts

        ##################################################################
        ### 3 leptons
        ##################################################################

        # eem
        muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)]
        elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)]
        ee_eem = elec_eem.distincts()
        ee_eemZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs(
            (ee_eem.i0 + ee_eem.i1).mass - 91) < 15)
        ee_eemOffZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs(
            (ee_eem.i0 + ee_eem.i1).mass - 91) > 15)
        ee_eemZmask = (ee_eemZmask[ee_eemZmask].counts > 0)
        ee_eemOffZmask = (ee_eemOffZmask[ee_eemOffZmask].counts > 0)

        eepair_eem = (ee_eem.i0 + ee_eem.i1)
        trilep_eem = eepair_eem.cross(muon_eem)
        trilep_eem = (trilep_eem.i0 + trilep_eem.i1)

        # mme
        muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)]
        elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)]
        mm_mme = muon_mme.distincts()
        mm_mmeZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs(
            (mm_mme.i0 + mm_mme.i1).mass - 91) < 15)
        mm_mmeOffZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs(
            (mm_mme.i0 + mm_mme.i1).mass - 91) > 15)
        mm_mmeZmask = (mm_mmeZmask[mm_mmeZmask].counts > 0)
        mm_mmeOffZmask = (mm_mmeOffZmask[mm_mmeOffZmask].counts > 0)

        mmpair_mme = (mm_mme.i0 + mm_mme.i1)
        trilep_mme = mmpair_mme.cross(elec_mme)
        trilep_mme = (trilep_mme.i0 + trilep_mme.i1)
        mZ_mme = mmpair_mme.mass
        mZ_eem = eepair_eem.mass
        m3l_eem = trilep_eem.mass
        m3l_mme = trilep_mme.mass

        ### eee and mmm
        eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)]
        mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)]
        # Create pairs
        eee_groups = eee.distincts()
        mmm_groups = mmm.distincts()
        # Calculate the invariant mass of the pairs
        invMass_eee = ((eee_groups.i0 + eee_groups.i1).mass)
        invMass_mmm = ((mmm_groups.i0 + mmm_groups.i1).mass)
        # OS pairs
        isOSeee = ((eee_groups.i0.charge != eee_groups.i1.charge))
        isOSmmm = ((mmm_groups.i0.charge != mmm_groups.i1.charge))
        # Get the ones with a mass closest to the Z mass (and in a range of  thr)
        clos_eee = IsClosestToZ(invMass_eee, thr=15)
        clos_mmm = IsClosestToZ(invMass_mmm, thr=15)
        # Finally, the mask for eee/mmm with/without OS onZ pair
        eeeOnZmask = (clos_eee) & (isOSeee)
        eeeOffZmask = (eeeOnZmask == 0)
        mmmOnZmask = (clos_mmm) & (isOSmmm)
        mmmOffZmask = (mmmOnZmask == 0)
        eeeOnZmask = (eeeOnZmask[eeeOnZmask].counts > 0)
        eeeOffZmask = (eeeOffZmask[eeeOffZmask].counts > 0)
        mmmOnZmask = (mmmOnZmask[mmmOnZmask].counts > 0)
        mmmOffZmask = (mmmOffZmask[mmmOffZmask].counts > 0)

        # Get Z and W invariant masses
        goodPairs_eee = eee_groups[(clos_eee) & (isOSeee)]
        eZ0 = goodPairs_eee.i0[goodPairs_eee.counts > 0].regular(
        )  #[(goodPairs_eee.counts>0)].regular()
        eZ1 = goodPairs_eee.i1[goodPairs_eee.counts > 0].regular(
        )  #[(goodPairs_eee.counts>0)].regular()
        goodPairs_mmm = mmm_groups[(clos_mmm) & (isOSmmm)]
        mZ0 = goodPairs_mmm.i0[goodPairs_mmm.counts > 0].regular(
        )  #[(goodPairs_eee.counts>0)].regular()
        mZ1 = goodPairs_mmm.i1[goodPairs_mmm.counts > 0].regular(
        )  #[(goodPairs_eee.counts>0)].regular()

        eee_reg = eee[(eeeOnZmask)].regular()
        eW = np.append(eee_reg, eZ0, axis=1)
        eW = np.append(eW, eZ1, axis=1)
        eWmask = np.apply_along_axis(
            lambda a: [list(a).count(x) == 1 for x in a], 1, eW)
        eW = eW[eWmask]
        mmm_reg = mmm[(mmmOnZmask)].regular()
        mW = np.append(mmm_reg, mZ0, axis=1)
        mW = np.append(mW, mZ1, axis=1)
        mWmask = np.apply_along_axis(
            lambda a: [list(a).count(x) == 1 for x in a], 1, mW)
        mW = mW[mWmask]

        eZ = [x + y for x, y in zip(eZ0, eZ1)]
        triElec = [x + y for x, y in zip(eZ, eW)]
        mZ_eee = [t[0].mass for t in eZ]
        m3l_eee = [t[0].mass for t in triElec]
        mZ = [x + y for x, y in zip(mZ0, mZ1)]
        triMuon = [x + y for x, y in zip(mZ, mW)]
        mZ_mmm = [t[0].mass for t in mZ]
        m3l_mmm = [t[0].mass for t in triMuon]

        # Triggers
        #passTrigger = lambda df, n, m, o : np.ones_like(df['MET_pt'], dtype=np.bool) # XXX
        trig_eeSS = passTrigger(df, 'ee', isData, dataset)
        trig_mmSS = passTrigger(df, 'mm', isData, dataset)
        trig_emSS = passTrigger(df, 'em', isData, dataset)
        trig_eee = passTrigger(df, 'eee', isData, dataset)
        trig_mmm = passTrigger(df, 'mmm', isData, dataset)
        trig_eem = passTrigger(df, 'eem', isData, dataset)
        trig_mme = passTrigger(df, 'mme', isData, dataset)

        # MET filters

        # Weights
        genw = np.ones_like(df['MET_pt']) if isData else df['genWeight']
        weights = processor.Weights(df.size)
        weights.add('norm', genw if isData else (xsec / sow) * genw)

        # Selections and cuts
        selections = processor.PackedSelection()
        channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS']
        selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('emSS', (emSSmask) & (trig_emSS))

        channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ']
        selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem))
        selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem))
        selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme))
        selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme))

        channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ']
        selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee))
        selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee))
        selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm))
        selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm))

        levels = ['base', '2jets', '4jets', '4j1b', '4j2b']
        selections.add('base', (nElec + nMuon >= 2))
        selections.add('2jets', (njets >= 2))
        selections.add('4jets', (njets >= 4))
        selections.add('4j1b', (njets >= 4) & (nbtags >= 1))
        selections.add('4j2b', (njets >= 4) & (nbtags >= 2))

        # Variables
        invMass_eeSSonZ = (eeSSonZ.i0 + eeSSonZ.i1).mass
        invMass_eeSSoffZ = (eeSSoffZ.i0 + eeSSoffZ.i1).mass
        invMass_mmSSonZ = (mmSSonZ.i0 + mmSSonZ.i1).mass
        invMass_mmSSoffZ = (mmSSoffZ.i0 + mmSSoffZ.i1).mass
        invMass_emSS = (emSS.i0 + emSS.i1).mass

        varnames = {}
        varnames['met'] = met.pt
        varnames['ht'] = ht
        varnames['njets'] = njets
        varnames['nbtags'] = nbtags
        varnames['invmass'] = {
            'eeSSonZ': invMass_eeSSonZ,
            'eeSSoffZ': invMass_eeSSoffZ,
            'mmSSonZ': invMass_mmSSonZ,
            'mmSSoffZ': invMass_mmSSoffZ,
            'emSS': invMass_emSS,
            'eemSSonZ': mZ_eem,
            'eemSSoffZ': mZ_eem,
            'mmeSSonZ': mZ_mme,
            'mmeSSoffZ': mZ_mme,
            'eeeSSonZ': mZ_eee,
            'eeeSSoffZ': mZ_eee,
            'mmmSSonZ': mZ_mmm,
            'mmmSSoffZ': mZ_mmm,
        }
        varnames['m3l'] = {
            'eemSSonZ': m3l_eem,
            'eemSSoffZ': m3l_eem,
            'mmeSSonZ': m3l_mme,
            'mmeSSoffZ': m3l_mme,
            'eeeSSonZ': m3l_eee,
            'eeeSSoffZ': m3l_eee,
            'mmmSSonZ': m3l_mmm,
            'mmmSSoffZ': m3l_mmm,
        }
        varnames['e0pt'] = e0.pt
        varnames['e0eta'] = e0.eta
        varnames['m0pt'] = m0.pt
        varnames['m0eta'] = m0.eta
        varnames['j0pt'] = j0.pt
        varnames['j0eta'] = j0.eta
        varnames['counts'] = np.ones_like(df['MET_pt'], dtype=np.int)

        # Fill Histos
        hout = self.accumulator.identity()
        hout['dummy'].fill(sample=dataset, dummy=1, weight=df.size)

        for var, v in varnames.items():
            for ch in channels2LSS + channels3L:
                for lev in levels:
                    weight = weights.weight()
                    cuts = [ch] + [lev]
                    cut = selections.all(*cuts)
                    weights_flat = weight[cut].flatten()
                    weights_ones = np.ones_like(weights_flat, dtype=np.int)
                    if var == 'invmass':
                        if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue
                        elif ch in ['eeeSSonZ', 'mmmSSonZ']:
                            continue  #values = v[ch]
                        else:
                            values = v[ch][cut].flatten()
                        hout['invmass'].fill(sample=dataset,
                                             channel=ch,
                                             cut=lev,
                                             invmass=values,
                                             weight=weights_flat)
                    elif var == 'm3l':
                        if ch in [
                                'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ',
                                'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ',
                                'mmmSSonZ'
                        ]:
                            continue
                        values = v[ch][cut].flatten()
                        hout['m3l'].fill(sample=dataset,
                                         channel=ch,
                                         cut=lev,
                                         m3l=values,
                                         weight=weights_flat)
                    else:
                        values = v[cut].flatten()
                        if var == 'ht':
                            hout[var].fill(ht=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'met':
                            hout[var].fill(met=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'njets':
                            hout[var].fill(njets=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'nbtags':
                            hout[var].fill(nbtags=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'counts':
                            hout[var].fill(counts=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_ones)
                        elif var == 'e0pt':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            hout[var].fill(e0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0pt':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            hout[var].fill(m0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'e0eta':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            hout[var].fill(e0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0eta':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            hout[var].fill(m0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0pt':
                            if lev == 'base': continue
                            hout[var].fill(j0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0eta':
                            if lev == 'base': continue
                            hout[var].fill(j0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)

        return hout
Example #6
0
    def process(self, df):
        output = self.accumulator.identity()

        datasetFull = df['dataset']
        dataset = datasetFull.replace('_2016', '')

        isData = 'Data' in dataset

        ################################
        # DEFINE JAGGED CANDIDATE ARRAYS
        ################################

        #load muon objects
        muons = JaggedCandidateArray.candidatesfromcounts(
            df['nMuon'],
            pt=df['Muon_pt'],
            eta=df['Muon_eta'],
            phi=df['Muon_phi'],
            mass=df['Muon_mass'],
            charge=df['Muon_charge'],
            relIso=df['Muon_pfRelIso04_all'],
            tightId=df['Muon_tightId'],
            isPFcand=df['Muon_isPFcand'],
            isTracker=df['Muon_isTracker'],
            isGlobal=df['Muon_isGlobal'],
        )

        #load electron objects
        electrons = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt=df['Electron_pt'],
            eta=df['Electron_eta'],
            phi=df['Electron_phi'],
            mass=df['Electron_mass'],
            charge=df['Electron_charge'],
            cutBased=df['Electron_cutBased'],
            d0=df['Electron_dxy'],
            dz=df['Electron_dz'],
        )

        #load jet object
        jets = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt=df['Jet_pt'],
            eta=df['Jet_eta'],
            phi=df['Jet_phi'],
            mass=df['Jet_mass'],
            jetId=df['Jet_jetId'],
            btag=df['Jet_btagDeepB'],
            area=df['Jet_area'],
            ptRaw=df['Jet_pt'] * (1 - df['Jet_rawFactor']),
            massRaw=df['Jet_mass'] * (1 - df['Jet_rawFactor']),
            hadFlav=df['Jet_hadronFlavour']
            if not isData else np.ones_like(df['Jet_jetId']),
            genJetIdx=df['Jet_genJetIdx']
            if not isData else np.ones_like(df['Jet_jetId']),
            ptGenJet=np.zeros_like(df['Jet_pt']),
        )

        #load photon objects
        photons = JaggedCandidateArray.candidatesfromcounts(
            df['nPhoton'],
            pt=df['Photon_pt'],
            eta=df['Photon_eta'],
            phi=df['Photon_phi'],
            mass=np.zeros_like(df['Photon_pt']),
            isEE=df['Photon_isScEtaEE'],
            isEB=df['Photon_isScEtaEB'],
            photonId=df['Photon_cutBased'],
            passEleVeto=df['Photon_electronVeto'],
            pixelSeed=df['Photon_pixelSeed'],
            sieie=df['Photon_sieie'],
            chIso=df['Photon_pfRelIso03_chg'] * df['Photon_pt'],
            vidCuts=df['Photon_vidNestedWPBitmap'],
            genFlav=df['Photon_genPartFlav']
            if not isData else np.ones_like(df['Photon_electronVeto']),
            genIdx=df['Photon_genPartIdx']
            if not isData else np.ones_like(df['Photon_electronVeto']),
        )

        rho = df['fixedGridRhoFastjetAll']

        if not isData:

            #load gen parton objects
            genPart = JaggedCandidateArray.candidatesfromcounts(
                df['nGenPart'],
                pt=df['GenPart_pt'],
                eta=df['GenPart_eta'],
                phi=df['GenPart_phi'],
                mass=df['GenPart_mass'],
                pdgid=df['GenPart_pdgId'],
                motherIdx=df['GenPart_genPartIdxMother'],
                status=df['GenPart_status'],
                statusFlags=df['GenPart_statusFlags'],
            )

            genmotherIdx = genPart.motherIdx
            genpdgid = genPart.pdgid

        #################
        # OVERLAP REMOVAL
        #################

        # Overlap removal between related samples
        # TTGamma and TTbar
        # WGamma and WJets
        # ZGamma and ZJets
        # We need to remove events from TTbar which are already counted in the phase space in which the TTGamma sample is produced
        # photon with pT> 10 GeV, eta<5, and at least dR>0.1 from other gen objects
        doOverlapRemoval = False
        if 'TTbar' in dataset:
            doOverlapRemoval = True
            overlapPt = 10.
            overlapEta = 5.
            overlapDR = 0.1
        if re.search("^W[1234]jets$", dataset):
            doOverlapRemoval = True
            overlapPt = 10.
            overlapEta = 2.5
            overlapDR = 0.05
        if 'DYjetsM' in dataset:
            doOverlapRemoval = True
            overlapPt = 15.
            overlapEta = 2.6
            overlapDR = 0.05

        if doOverlapRemoval:
            overlapPhoSelect = ((genPart.pt >= overlapPt) &
                                (abs(genPart.eta) < overlapEta) &
                                (genPart.pdgid == 22) & (genPart.status == 1))
            #potential overlap photons are only those passing the kinematic cuts
            OverlapPhotons = genPart[overlapPhoSelect]

            #if the overlap photon is actually from a non prompt decay, it's not part of the phase space of the separate sample
            idx = OverlapPhotons.motherIdx
            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops,
                                        genpdgid.content, genpdgid.starts,
                                        genpdgid.stops, genmotherIdx.content,
                                        genmotherIdx.starts,
                                        genmotherIdx.stops)

            finalGen = genPart[(
                (genPart.status == 1) | (genPart.status == 71)) & ~(
                    (abs(genPart.pdgid) == 12) | (abs(genPart.pdgid) == 14) |
                    (abs(genPart.pdgid) == 16))]
            genPairs = OverlapPhotons['p4'].cross(finalGen['p4'], nested=True)
            ##remove the case where the cross produce is the gen photon with itself
            genPairs = genPairs[~(genPairs.i0 == genPairs.i1)]
            #find closest gen particle to overlap photons
            dRPairs = genPairs.i0.delta_r(genPairs.i1)

            #the event is overlapping with the separate sample if there is an overlap photon passing the dR cut and not coming from hadronic activity
            isOverlap = ((dRPairs.min() > overlapDR) & (maxParent < 37)).any()
            passOverlapRemoval = ~isOverlap
        else:
            passOverlapRemoval = np.ones_like(df['event']) == 1

        ##################
        # OBJECT SELECTION
        ##################
        # PART 1A Uncomment to add in object selection

        # 1. ADD SELECTION
        #select tight muons
        # tight muons should have a pt of at least 30 GeV, |eta| < 2.4, pass the tight muon ID cut (tightID variable), and have a relative isolation of less than 0.15
        muonSelectTight = ((muons.pt > 30) & (abs(muons.eta) < 2.4) &
                           (muons.tightID) & (muons.relIso < 0.15))

        #select loose muons
        muonSelectLoose = ((muons.pt > 15) & (abs(muons.eta) < 2.4) &
                           ((muons.isPFcand) &
                            (muons.isTracker | muons.isGlobal)) &
                           (muons.relIso < 0.25) & np.invert(muonSelectTight))

        eleEtaGap = (abs(electrons.eta) < 1.4442) | (abs(electrons.eta) >
                                                     1.566)
        elePassD0 = ((abs(electrons.eta) < 1.479) & (abs(electrons.d0) < 0.05)
                     | (abs(electrons.eta) > 1.479) &
                     (abs(electrons.d0) < 0.1))
        elePassDZ = ((abs(electrons.eta) < 1.479) & (abs(electrons.dz) < 0.1) |
                     (abs(electrons.eta) > 1.479) & (abs(electrons.dz) < 0.2))

        #select tight electrons
        # 1. ADD SELECTION
        #select tight electrons
        # tight electrons should have a pt of at least 35 GeV, |eta| < 2.1, pass the cut based electron id (cutBased variable in NanoAOD>=4), and pass the etaGap, D0, and DZ cuts defined above
        electronSelectTight = ((electrons.pt > 35) & (abs(electrons.eta) < 2.1)
                               & (electrons.cutBased >= 4) & eleEtaGap
                               & elePassD0 & elePassDZeleEtaGap)

        #select loose electrons
        electronSelectLoose = ((electrons.pt > 15) & (abs(electrons.eta) < 2.4)
                               & (electrons.cutBased >= 1) & eleEtaGap
                               & elePassD0 & elePassDZ
                               & np.invert(electronSelectTight))

        # 1. ADD SELECTION
        #  Object selection
        #select the subset of muons passing the muonSelectTight and muonSelectLoose cuts
        tightMuon = muons[muonSelectTight]
        looseMuon = muons[muonSelectLoose]

        # 1. ADD SELECTION
        #  Object selection
        #select the subset of electrons passing the electronSelectTight and electronSelectLoose cuts
        tightElectron = electros[electronSelectTight]
        looseElectron = electros[electronSelectLoose]

        #### Calculate deltaR between photon and nearest muon
        ####### make combination pairs
        phoMu = photons['p4'].cross(tightMuon['p4'], nested=True)

        ####### check delta R of each combination, if min is >0.1 it is okay, or if there are no tight muons it passes
        dRphomu = (phoMu.i0.delta_r(phoMu.i1) > 0.4).all() | (tightMuon.counts
                                                              == 0)
        phoEle = photons['p4'].cross(tightElectron['p4'], nested=True)
        dRphoele = ((phoEle.i0.delta_r(phoEle.i1)).min() >
                    0.4) | (tightElectron.counts == 0)

        #photon selection (no ID requirement used here)
        photonSelect = ((photons.pt > 20) & (abs(photons.eta) < 1.4442) &
                        (photons.isEE | photons.isEB) & (photons.passEleVeto)
                        & np.invert(photons.pixelSeed) & dRphomu & dRphoele)

        #split out the ID requirement, enabling Iso and SIEIE to be inverted for control regions
        photonID = photons.photonId >= 2

        #parse VID cuts, define loose photons (photons without chIso cut)
        photon_MinPtCut = (photons.vidCuts >> 0 & 3) >= 2
        photon_PhoSCEtaMultiRangeCut = (photons.vidCuts >> 2 & 3) >= 2
        photon_PhoSingleTowerHadOverEmCut = (photons.vidCuts >> 4 & 3) >= 2
        photon_PhoFull5x5SigmaIEtaIEtaCut = (photons.vidCuts >> 6 & 3) >= 2
        photon_ChIsoCut = (photons.vidCuts >> 8 & 3) >= 2
        photon_NeuIsoCut = (photons.vidCuts >> 10 & 3) >= 2
        photon_PhoIsoCut = (photons.vidCuts >> 12 & 3) >= 2

        #photons passing all ID requirements, without the charged hadron isolation cut applied
        photonID_NoChIso = (photon_MinPtCut & photon_PhoSCEtaMultiRangeCut
                            & photon_PhoSingleTowerHadOverEmCut
                            & photon_PhoFull5x5SigmaIEtaIEtaCut
                            & photon_NeuIsoCut & photon_PhoIsoCut)

        # 1. ADD SELECTION
        #  Object selection
        #select tightPhotons, the subset of photons passing the photonSelect cut and the photonID cut
        tightPhotons = photons[photonSelect & photonID]
        #select loosePhotons, the subset of photons passing the photonSelect cut and all photonID cuts without the charged hadron isolation cut applied
        loosePhotons = photons[photonSelect & photonID_NoChIso]

        #update jet kinematics based on jete energy systematic uncertainties
        if not isData:
            genJet = JaggedCandidateArray.candidatesfromcounts(
                df['nGenJet'],
                pt=df['GenJet_pt'],
                eta=df['GenJet_eta'],
                phi=df['GenJet_phi'],
                mass=df['GenJet_mass'],
            )

            jets.genJetIdx[
                jets.genJetIdx >= genJet.
                counts] = -1  #fixes a but in genJet indices, skimmed after genJet matching

            jets['ptGenJet'][jets.genJetIdx > -1] = genJet[jets.genJetIdx[
                jets.genJetIdx > -1]].pt
            jets['rho'] = jets.pt.ones_like() * rho

            #adds additional columns to the jets array, containing the jet pt with JEC and JER variations
            #    additional columns added to jets:  pt_jer_up,   mass_jer_up
            #                                       pt_jer_down, mass_jer_down
            #                                       pt_jes_up,   mass_jes_up
            #                                       pt_jes_down, mass_jes_down
            Jet_transformer.transform(jets)

            # 4. ADD SYSTEMATICS
            #   If processing a jet systematic (based on value of self.jetSyst variable) update the jet pt and mass to reflect the jet systematic uncertainty variations
            #   Use the function updateJetP4(jets, pt=NEWPT, mass=NEWMASS) to update the pt and mass

        ##check dR jet,lepton & jet,photon
        jetMu = jets['p4'].cross(tightMuon['p4'], nested=True)
        dRjetmu = (
            (jetMu.i0.delta_r(jetMu.i1)).min() > 0.4) | (tightMuon.counts == 0)

        jetEle = jets['p4'].cross(tightElectron['p4'], nested=True)
        dRjetele = ((jetEle.i0.delta_r(jetEle.i1)).min() >
                    0.4) | (tightElectron.counts == 0)

        jetPho = jets['p4'].cross(tightPhotons['p4'], nested=True)
        dRjetpho = ((jetPho.i0.delta_r(jetPho.i1)).min() >
                    0.1) | (tightPhotons.counts == 0)

        # 1. ADD SELECTION
        #select good jets
        # jetsshould have a pt of at least 30 GeV, |eta| < 2.4, pass the medium jet id (bit-wise selected from the jetID variable), and pass the delta R cuts defined above (dRjetmu, dRjetele, dRjetpho)
        jetSelect = ((jets.pt > 30) & (abs(jets.eta) < 2.4) &
                     ((jets.jetId >> 1 & 1) == 1) & dRjetmu & dRjetele
                     & dRjetpho)

        # 1. ADD SELECTION
        #select the subset of jets passing the jetSelect cuts
        tightJets = jets[jetSelect]

        #find jets passing DeepCSV medium working point
        bTagWP = 0.6321  #2016 DeepCSV working point

        # 1. ADD SELECTION
        # select the subset of tightJets which pass the Deep CSV tagger
        bTaggedJets = tightJets[jets.btag > bTagWP]

        #####################
        # EVENT SELECTION
        #####################
        ### PART 1B: Uncomment to add event selection
        """
        # 1. ADD SELECTION
        ## apply triggers
        # muon events should be triggered by either the HLT_IsoMu24 or HLT_IsoTkMu24 triggers
        # electron events should be triggered by HLT_Ele27_WPTight_Gsf trigger
        # HINT: trigger values can be accessed with the variable df['TRIGGERNAME'], 
        # the bitwise or operator can be used to select multiple triggers df['TRIGGER1'] | df['TRIGGER2']
        muTrigger = df['HLT_IsoMu24'] & df['HLT_IsoTkMu24']
        eleTrigger = df['HLT_Ele27_WPTight_Gsf']

        # 1. ADD SELECTION
        #  Event selection
        #oneMuon, should be true if there is exactly one tight muon in the event (hint, the .counts method returns the number of objects in each row of a jagged array)
        oneMuon = ?
        #muVeto, should be true if there are no tight muons in the event
        muVeto = ?

        # 1. ADD SELECTION
        #  Event selection
        #oneEle should be true if there is exactly one tight electron in the event
        oneEle = ?
        #eleVeto should be true if there are no tight electrons in the event
        eleVeto = ?

        # 1. ADD SELECTION
        #  Event selection
        #looseMuonSel and looseElectronSel should be tru if there are 0 loose muons or electrons in the event
        looseMuonSel = ?
        looseElectronSel = ?


        # 1. ADD SELECTION
        # muon selection, requires events to pass:   muon trigger
        #                                            overlap removal
        #                                            have exactly one muon
        #                                            have no electrons
        #                                            have no loose muons
        #                                            have no loose electrons
        muon_eventSelection = ?
        # electron selection, requires events to pass:   electron trigger
        #                                                overlap removal
        #                                                have exactly one electron
        #                                                have no muons
        #                                                have no loose muons
        #                                                have no loose electrons
        electron_eventSelection = ?

        #create a selection object
        selection = processor.PackedSelection()

        # 1. ADD SELECTION
        #add selection 'eleSel', for events passing the electron event selection, and muSel for those passing the muon event selection
        #  ex: selection.add('testSelection', array_of_booleans)
        selection.add('eleSel', ???)
        selection.add('muSel', ???)

        #add two jet selection criteria
        #   First, 'jetSel' which selects events with at least 4 tightJets and at least one bTaggedJets
        selection.add('jetSel', ???)
        #   Second, 'jetSel_3j0t' which selects events with at least 3 tightJets and exactly zero bTaggedJets
        selection.add('jetSel_3j0t', ???)

        # add selection for events with exactly 0 tight photons
        selection.add('zeroPho', ?)
        # add selection for events with exactly 1 tight photon
        selection.add('onePho', ?)
        # add selection for events with exactly 1 loose photon
        selection.add('loosePho', ?)
        """

        ##################
        # EVENT VARIABLES
        ##################

        # PART 2A: Uncomment to begin implementing event variables

        # 2. DEFINE VARIABLES
        ## Define M3, mass of 3-jet pair with highest pT
        # find all possible combinations of 3 tight jets in the events (hint: using the .p4.choose() method of jagged arrays to do combinations of the TLorentzVectors)
        triJet = tightjets.p4.choose(3)  # need to update
        # calculate
        triJetPt = (triJet.i0 + triJet.i1 + triJet.i2).pt
        triJetMass = (triJet.i0 + triJet.i1 + triJet.i2).mass

        # define the M3 variable, the triJetMass of the combination with the highest triJetPt value (hint: using the .argmax() method)
        M3 = triJetMass[triJetPt.argmax()]

        leadingPhoton = tightPhotons[:, :1]
        leadingPhotonLoose = loosePhotons[:, :1]

        # 2. DEFINE VARIABLES
        # define egammaMass, mass of combinations of tightElectron and leadingPhoton (hint: using the .cross() method)
        egammaPairs = tightElectron.p4.cross(LeadingPhoton.p4)
        egammaMass = tightElectron.p4.cross(LeadingPhoton.mass)
        # define egammaMass, mass of combinations of tightElectron and leadingPhoton (hint: using the .cross() method)
        mugammaPairs = tightMuon.p4.cross(LeadingPhoton.p4)
        mugammaMass = tightMuon.p4.cross(LeadingPhoton.mass)

        ###################
        # PHOTON CATEGORIES
        ###################

        # Define photon category for each event

        phoCategory = np.ones(df.size)
        phoCategoryLoose = np.ones(df.size)

        # PART 2B: Uncomment to begin implementing photon categorization
        """
        if not isData:
            #### Photon categories, using genIdx branch of the leading photon in the event
            idx = leadingPhoton.genIdx
            
            # look through gen particle history, finding the highest PDG ID
            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, 
                                        genpdgid.content, genpdgid.starts, genpdgid.stops, 
                                        genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops)

            # reco photons matched to a generated photon
            matchedPho = (genpdgid[idx]==22).any()
            # reco photons really generated as electrons
            matchedEle = (abs(genpdgid[idx])==11).any()

            # if the gen photon has a PDG ID > 25 in it's history, it has a hadronic parent
            hadronicParent = maxParent>25


            #####
            # 2. DEFINE VARIABLES
            # define the photon categories for tight photon events
            # a genuine photon is a reconstructed photon which is matched to a generator level photon, and does not have a hadronic parent
            isGenPho = ?
            # a hadronic photon is a reconstructed photon which is matched to a generator level photon, but has a hadronic parent
            isHadPho = ?
            # a misidentified electron is a reconstructed photon which is 
            isMisIDele = ?
            # a hadronic/fake photon is a reconstructed photon that does not fall within any of the above categories
            isHadFake = ?
            
            #define integer definition for the photon category axis
            phoCategory = 1*isGenPho + 2*isMisIDele + 3*isHadPho + 4*isHadFake
            

            # do photon matching for loose photons as well
            # look through parentage to find if any hadrons in genPhoton parent history
            idx = leadingPhotonLoose.genIdx

            # reco photons matched to a generated photon
            matchedPhoLoose = (genpdgid[idx]==22).any()
            # reco photons really generated as electrons
            matchedEleLoose = (abs(genpdgid[idx])==11).any()

            maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, 
                                        genpdgid.content, genpdgid.starts, genpdgid.stops, 
                                        genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops)

            hadronicParent = maxParent>25

            #####
            # 2. DEFINE VARIABLES
            # a genuine photon is a reconstructed photon which is matched to a generator level photon, and does not have a hadronic parent
            isGenPhoLoose = ?
            # a hadronic photon is a reconstructed photon which is matched to a generator level photon, but has a hadronic parent
            isHadPhoLoose = ?
            # a misidentified electron is a reconstructed photon which is 
            isMisIDeleLoose = ?
            # a hadronic/fake photon is a reconstructed photon that does not fall within any of the above categories
            isHadFakeLoose = ?

            #define integer definition for the photon category axis
            phoCategoryLoose = 1*isGenPhoLoose + 2*isMisIDeleLoose + 3*isHadPhoLoose + 4*isHadFakeLoose
        """

        ################
        # EVENT WEIGHTS
        ################

        #create a processor Weights object, with the same length as the number of events in the chunk
        weights = processor.Weights(len(df['event']))

        if not isData:

            lumiWeight = np.ones(df.size)
            nMCevents = self.mcEventYields[datasetFull]
            xsec = crossSections[dataset]
            luminosity = 35860.0
            lumiWeight *= xsec * luminosity / nMCevents

            weights.add('lumiWeight', lumiWeight)

            # PART 4: Uncomment to add weights and systematics
            """
            nPUTrue = df['Pileup_nTrueInt']

            # 4. SYSTEMATICS
            # calculate pileup weights and variations
            # use the puLookup, puLookup_Up, and puLookup_Down lookup functions to find the nominal and up/down systematic weights
            # the puLookup function is called with the full dataset name (datasetFull) and the number of true interactions
            puWeight = ?
            puWeight_Up = ?
            puWeight_Down = ?
            # add the puWeight and it's uncertainties to the weights container
            weights.add('puWeight',weight=?, weightUp=?, weightDown=?)


            eleID = self.ele_id_sf(tightElectron.eta, tightElectron.pt)
            eleIDerr = self.ele_id_err(tightElectron.eta, tightElectron.pt)
            eleRECO = self.ele_reco_sf(tightElectron.eta, tightElectron.pt)
            eleRECOerr = self.ele_reco_err(tightElectron.eta, tightElectron.pt)

            eleSF = (eleID*eleRECO).prod()
            eleSF_up = ((eleID + eleIDerr) * (eleRECO + eleRECOerr)).prod()
            eleSF_down = ((eleID - eleIDerr) * (eleRECO - eleRECOerr)).prod()
            # 4. SYSTEMATICS
            # add electron efficiency weights to the weight container
            weights.add('eleEffWeight',weight=?, weightUp=?, weightDown=?)

            muID = self.mu_id_sf(tightMuon.eta, tightMuon.pt)
            muIDerr = self.mu_id_err(tightMuon.eta, tightMuon.pt)
            muIso = self.mu_iso_sf(tightMuon.eta, tightMuon.pt)
            muIsoerr = self.mu_iso_err(tightMuon.eta, tightMuon.pt)
            muTrig = self.mu_iso_sf(abs(tightMuon.eta), tightMuon.pt)
            muTrigerr = self.mu_iso_err(abs(tightMuon.eta), tightMuon.pt)
            
            muSF = (muID*muIso*muTrig).prod()
            muSF_up = ((muID + muIDerr) * (muIso + muIsoerr) * (muTrig + muTrigerr)).prod()
            muSF_down = ((muID - muIDerr) * (muIso - muIsoerr) * (muTrig - muTrigerr)).prod()

            # 4. SYSTEMATICS
            # add electron efficiency weights to the weight container
            weights.add('muEffWeight',weight=?, weightUp=?, weightDown=?)

            #btag key name
            #name / working Point / type / systematic / jetType
            #  ... / 0-loose 1-medium 2-tight / comb,mujets,iterativefit / central,up,down / 0-b 1-c 2-udcsg 

            bJetSF_b = self.evaluator['btag2016DeepCSV_1_comb_central_0'](tightJets[tightJets.hadFlav==5].eta, tightJets[tightJets.hadFlav==5].pt, tightJets[tightJets.hadFlav==5].btag)
            bJetSF_c = self.evaluator['btag2016DeepCSV_1_comb_central_1'](tightJets[tightJets.hadFlav==4].eta, tightJets[tightJets.hadFlav==4].pt, tightJets[tightJets.hadFlav==4].btag)
            bJetSF_udcsg = self.evaluator['btag2016DeepCSV_1_incl_central_2'](tightJets[tightJets.hadFlav==0].eta, tightJets[tightJets.hadFlav==0].pt, tightJets[tightJets.hadFlav==0].btag)

            bJetSF_b_up = self.evaluator['btag2016DeepCSV_1_comb_up_0'](tightJets[tightJets.hadFlav==5].eta, tightJets[tightJets.hadFlav==5].pt, tightJets[tightJets.hadFlav==5].btag)
            bJetSF_c_up = self.evaluator['btag2016DeepCSV_1_comb_up_1'](tightJets[tightJets.hadFlav==4].eta, tightJets[tightJets.hadFlav==4].pt, tightJets[tightJets.hadFlav==4].btag)
            bJetSF_udcsg_up = self.evaluator['btag2016DeepCSV_1_incl_up_2'](tightJets[tightJets.hadFlav==0].eta, tightJets[tightJets.hadFlav==0].pt, tightJets[tightJets.hadFlav==0].btag)

            bJetSF_b_down = self.evaluator['btag2016DeepCSV_1_comb_down_0'](tightJets[tightJets.hadFlav==5].eta, tightJets[tightJets.hadFlav==5].pt, tightJets[tightJets.hadFlav==5].btag)
            bJetSF_c_down = self.evaluator['btag2016DeepCSV_1_comb_down_1'](tightJets[tightJets.hadFlav==4].eta, tightJets[tightJets.hadFlav==4].pt, tightJets[tightJets.hadFlav==4].btag)
            bJetSF_udcsg_down = self.evaluator['btag2016DeepCSV_1_incl_down_2'](tightJets[tightJets.hadFlav==0].eta, tightJets[tightJets.hadFlav==0].pt, tightJets[tightJets.hadFlav==0].btag)

            bJetSF = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops)
            bJetSF.content[(tightJets.hadFlav==5).content] = bJetSF_b.content
            bJetSF.content[(tightJets.hadFlav==4).content] = bJetSF_c.content
            bJetSF.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg.content

            bJetSF_heavy_up = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops)
            bJetSF_heavy_up.content[(tightJets.hadFlav==5).content] = bJetSF_b_up.content
            bJetSF_heavy_up.content[(tightJets.hadFlav==4).content] = bJetSF_c_up.content
            bJetSF_heavy_up.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg.content

            bJetSF_heavy_down = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops)
            bJetSF_heavy_down.content[(tightJets.hadFlav==5).content] = bJetSF_b_down.content
            bJetSF_heavy_down.content[(tightJets.hadFlav==4).content] = bJetSF_c_down.content
            bJetSF_heavy_down.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg.content

            bJetSF_light_up = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops)
            bJetSF_light_up.content[(tightJets.hadFlav==5).content] = bJetSF_b.content
            bJetSF_light_up.content[(tightJets.hadFlav==4).content] = bJetSF_c.content
            bJetSF_light_up.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg_up.content

            bJetSF_light_down = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops)
            bJetSF_light_down.content[(tightJets.hadFlav==5).content] = bJetSF_b.content
            bJetSF_light_down.content[(tightJets.hadFlav==4).content] = bJetSF_c.content
            bJetSF_light_down.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg_down.content

            ## mc efficiency lookup, data efficiency is eff* scale factor
            btagEfficiencies = taggingEffLookup(datasetFull,tightJets.hadFlav,tightJets.pt,tightJets.eta)
            btagEfficienciesData = btagEfficiencies*bJetSF

            btagEfficienciesData_b_up   = btagEfficiencies*bJetSF_heavy_up
            btagEfficienciesData_b_down = btagEfficiencies*bJetSF_heavy_down
            btagEfficienciesData_l_up   = btagEfficiencies*bJetSF_light_up
            btagEfficienciesData_l_down = btagEfficiencies*bJetSF_light_down

            ##probability is the product of all efficiencies of tagged jets, times product of 1-eff for all untagged jets
            ## https://twiki.cern.ch/twiki/bin/view/CMS/BTagSFMethods#1a_Event_reweighting_using_scale
            pMC   = btagEfficiencies[btagged].prod()     * (1.-btagEfficiencies[np.invert(btagged)]).prod() 
            pData = btagEfficienciesData[btagged].prod() * (1.-btagEfficienciesData[np.invert(btagged)]).prod()
            pData_b_up = btagEfficienciesData_b_up[btagged].prod() * (1.-btagEfficienciesData_b_up[np.invert(btagged)]).prod()
            pData_b_down = btagEfficienciesData_b_down[btagged].prod() * (1.-btagEfficienciesData_b_down[np.invert(btagged)]).prod()
            pData_l_up = btagEfficienciesData_l_up[btagged].prod() * (1.-btagEfficienciesData_l_up[np.invert(btagged)]).prod()
            pData_l_down = btagEfficienciesData_l_down[btagged].prod() * (1.-btagEfficienciesData_l_down[np.invert(btagged)]).prod()

            pMC[pMC==0]=1. #avoid 0/0 error
            btagWeight = pData/pMC

            pData[pData==0] = 1. #avoid divide by 0 error
            btagWeight_b_up = pData_b_up/pData
            btagWeight_b_down = pData_b_down/pData
            btagWeight_l_up = pData_l_up/pData
            btagWeight_l_down = pData_l_down/pData

            weights.add('btagWeight',btagWeight)

            weights.add('btagWeight_heavy',weight=np.ones_like(btagWeight), weightUp=btagWeight_b_up, weightDown=btagWeight_b_down)
            weights.add('btagWeight_light',weight=np.ones_like(btagWeight), weightUp=btagWeight_l_up, weightDown=btagWeight_l_down)




            #in some samples, generator systemtatics are not available, in those case the systematic weights of 1. are used
            try:
                generatorWeight = df['Generator_weight']
                generatorWeight.shape = (generatorWeight.size,1)

                LHEWeight_originalXWGTUP = df['LHEWeight_originalXWGTUP']
                LHEWeight_originalXWGTUP.shape = (LHEWeight_originalXWGTUP.size,1)

                nPSWeights = df['nPSWeight']
                PSWeights = df['PSWeight']
                PSWeights.shape = (nPSWeights.size,int(nPSWeights.mean()))
                if nPSWeights.mean()==1:
                    hasWeights=False
                
                nLHEScaleWeights = df['nLHEScaleWeight']
                LHEScaleWeights = df['LHEScaleWeight']
                LHEScaleWeights.shape = (nLHEScaleWeights.size,int(nLHEScaleWeights.mean()))
                
                nLHEPdfWeights = df['nLHEPdfWeight']
                LHEPdfWeights = df['LHEPdfWeight']
                LHEPdfWeights.shape = (nLHEPdfWeights.size,int(nLHEPdfWeights.mean()))

                #PDF Uncertainty weights
                #avoid errors from 0/0 division
                if (LHEPdfWeights[:,:1]==0).any():
                    LHEPdfWeights[:,0][LHEPdfWeights[:,0]==0] = 1.
                LHEPdfVariation = LHEPdfWeights / LHEPdfWeights[:,:1]

                weights.add('PDF', weight=np.ones(df.size), weightUp=LHEPdfVariation.max(axis=1), weightDown=LHEPdfVariation.min(axis=1))

                #Q2 Uncertainty weights
                if nLHEScaleWeights.mean()==9:
                    scaleWeightSelector=[0,1,3,5,7,8]
                elif nLHEScaleWeights.mean()==44:
                    scaleWeightSelector=[0,5,15,24,34,39]
                else:
                    scaleWeightSelector=[]

                LHEScaleVariation = LHEScaleWeights[:,scaleWeightSelector]

                weights.add('Q2Scale', weight=np.ones(df.size), weightUp=LHEScaleVariation.max(axis=1), weightDown=LHEScaleVariation.min(axis=1))

                #ISR / FSR uncertainty weights
                if not (generatorWeight==LHEWeight_originalXWGTUP).all():
                    PSWeights = PSWeights * LHEWeight_originalXWGTUP / generatorWeight

                weights.add('ISR',weight=np.ones(df.size), weightUp=PSWeights[:,2], weightDown=PSWeights[:,0])
                weights.add('FSR',weight=np.ones(df.size), weightUp=PSWeights[:,3], weightDown=PSWeights[:,1])

            else:
                weights.add('ISR',    weight=np.ones(df.size),weightUp=np.ones(df.size),weightDown=np.ones(df.size))
                weights.add('FSR',    weight=np.ones(df.size),weightUp=np.ones(df.size),weightDown=np.ones(df.size))
                weights.add('PDF',    weight=np.ones(df.size),weightUp=np.ones(df.size),weightDown=np.ones(df.size))
                weights.add('Q2Scale',weight=np.ones(df.size),weightUp=np.ones(df.size),weightDown=np.ones(df.size))

            """

        ###################
        # FILL HISTOGRAMS
        ###################
        # PART 3: Uncomment to add histograms
        """
        #list of systematics
        systList = ['nowegiht','nominal']

        # PART 4: SYSTEMATICS
        # uncomment the full list after systematics have been implemented
        #systList = ['noweight','nominal','puWeightUp','puWeightDown','muEffWeightUp','muEffWeightDown','eleEffWeightUp','eleEffWeightDown','btagWeight_lightUp','btagWeight_lightDown','btagWeight_heavyUp','btagWeight_heavyDown', 'ISRUp', 'ISRDown', 'FSRUp', 'FSRDown', 'PDFUp', 'PDFDown', 'Q2ScaleUp', 'Q2ScaleDown']

        if not self.jetSyst=='nominal':
            systList=[self.jetSyst]

        if isData:
            systList = ['noweight']

        for syst in systList:
            
            #find the event weight to be used when filling the histograms
            weightSyst = syst
            #in the case of 'nominal', or the jet energy systematics, no weight systematic variation is used (weightSyst=None)
            if syst in ['nominal','JERUp','JERDown','JESUp','JESDown']:
                weightSyst=None
                
            if syst=='noweight':
                evtWeight = np.ones(df.size)
            else:
                # call weights.weight() with the name of the systematic to be varied
                evtWeight = weights.weight(weightSyst)


            #loop over both electron and muon selections
            for lepton in ['electron','muon']:
                if lepton=='electron':
                    lepSel='eleSel'
                if lepton=='muon':
                    lepSel='muSel'

                # 3. GET HISTOGRAM EVENT SELECTION
                #  use the selection.all() method to select events passing the lepton selection, 4-jet 1-tag jet selection, and either the one-photon or loose-photon selections
                #  ex: selection.all( *('LIST', 'OF', 'SELECTION', 'CUTS') )
                phosel = selection.all( *(???))
                phoselLoose = selection.all( *(???) )

                # 3. FILL HISTOGRAMS
                #    fill photon_pt and photon_eta, using the tightPhotons array, from events passing the phosel selection
                output['photon_pt'].fill(dataset=dataset,
                                         pt=?,
                                         category=?,
                                         lepFlavor=lepton,
                                         systematic=syst,
                                         weight=?)
    
                output['photon_eta'].fill(dataset=dataset,
                                         pt=?,
                                         category=?,
                                         lepFlavor=lepton,
                                         systematic=syst,
                                         weight=?)

                #    fill photon_chIso histogram, using the loosePhotons array (photons passing all cuts, except the charged hadron isolation cuts)
                output['photon_chIso'].fill(dataset=dataset,
                                            chIso=?,
                                            category=?,
                                            lepFlavor=lepton,
                                            systematic=syst,
                                            weight=?)
                
                #    fill M3 histogram, for events passing the phosel selection
                output['M3'].fill(dataset=dataset,
                                  M3=?,
                                  category=?,
                                  lepFlavor=lepton,
                                  systematic=syst,
                                  weight=?)

                
            
            # 3. GET HISTOGRAM EVENT SELECTION
            #  use the selection.all() method to select events passing the eleSel or muSel selection, 3-jet 0-btag selection, and have exactly one photon
            phosel_3j0t_e  = selection.all( *('eleSel', ???) )
            phosel_3j0t_mu = selection.all( *('muSel', ???) )

            # 3. FILL HISTOGRAMS
            # fill photon_lepton_mass_3j0t histogram, using the egammaMass array, for events passing the phosel_3j0t_e 
            output['photon_lepton_mass_3j0t'].fill(dataset=dataset,
                                                   mass=?,
                                                   category=?
                                                   lepFlavor='electron',
                                                   systematic=syst,
                                                   weight=?)
            output['photon_lepton_mass_3j0t'].fill(dataset=dataset,
                                                   mass=?,
                                                   category=?,
                                                   lepFlavor='muon',
                                                   systematic=syst,
                                                   weight=?)
            
        """

        output['EventCount'] = len(df['event'])

        return output
Example #7
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_znunu'] = is_lo_znunu(dataset)
        df['is_lo_w_ewk'] = is_lo_w_ewk(dataset)
        df['is_lo_z_ewk'] = is_lo_z_ewk(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[
            'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[
                'is_lo_w_ewk'] | df['is_lo_z_ewk']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[
                'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']:
            gen = setup_gen_candidates(df)
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_combined']
        elif df['is_lo_g']:
            gen = setup_gen_candidates(df)
            all_gen_photons = gen[(gen.pdg == 22)]
            prompt_mask = (all_gen_photons.status
                           == 1) & (all_gen_photons.flag & 1 == 1)
            stat1_mask = (all_gen_photons.status == 1)
            gen_photons = all_gen_photons[prompt_mask |
                                          (~prompt_mask.any()) & stat1_mask]
            gen_photon = gen_photons[gen_photons.pt.argmax()]

            gen_v_pt = gen_photon.pt.max()

        # Generator-level leading dijet mass
        if df['has_lhe_v_pt']:
            genjets = setup_lhe_cleaned_genjets(df)
            digenjet = genjets[:, :2].distincts()
            df['mjj_gen'] = digenjet.mass.max()
            df['mjj_gen'] = np.where(df['mjj_gen'] > 0, df['mjj_gen'], 0)

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates(
            df, cfg)

        # Remove jets in accordance with the noise recipe
        if df['year'] == 2017:
            ak4 = ak4[(ak4.ptraw > 50) | (ak4.abseta < 2.65) |
                      (ak4.abseta > 3.139)]
            bjets = bjets[(bjets.ptraw > 50) | (bjets.abseta < 2.65) |
                          (bjets.abseta > 3.139)]

        # Filtering ak4 jets according to pileup ID
        ak4 = ak4[ak4.puid]

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts == 1) *
                       mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.absetasc < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts == 1) *
                       mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index = ak4.pt.argmax()

        elejet_pairs = ak4[:, :1].cross(electrons)
        df['dREleJet'] = np.hypot(
            elejet_pairs.i0.eta - elejet_pairs.i1.eta,
            dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:, :1].cross(muons)
        df['dRMuonJet'] = np.hypot(
            muonjet_pairs.i0.eta - muonjet_pairs.i1.eta,
            dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons,
                                                   muons, photons)

        df["dPFCaloSR"] = (met_pt - df["CaloMET_pt"]) / met_pt
        df["dPFCaloCR"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]

        df["dPFTkSR"] = (met_pt - df["TkMET_pt"]) / met_pt

        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4,
                                                  df['recoil_phi'],
                                                  njet=4,
                                                  ptmin=30,
                                                  etamax=5.0)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4,
                                               met_phi,
                                               njet=4,
                                               ptmin=30,
                                               etamax=5.0)
        selection = processor.PackedSelection()

        # Triggers
        pass_all = np.ones(df.size) == 1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)

        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts == 0)
        selection.add('veto_muo', muons.counts == 0)
        selection.add('veto_photon', photons.counts == 0)
        selection.add('veto_tau', taus.counts == 0)
        selection.add('at_least_one_tau', taus.counts > 0)
        selection.add('veto_b', bjets.counts == 0)
        selection.add('mindphijr',
                      df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('mindphijm',
                      df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR)

        selection.add('dpfcalo_sr',
                      np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('dpfcalo_cr',
                      np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO)

        selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL)
        selection.add('met_sr', met_pt > cfg.SELECTION.SIGNAL.RECOIL)

        # AK4 dijet
        diak4 = ak4[:, :2].distincts()
        leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & (
            np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA)
        trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & (
            np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA)
        hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any()
        has_track0 = np.abs(diak4.i0.eta) <= 2.5
        has_track1 = np.abs(diak4.i1.eta) <= 2.5

        leadak4_id = diak4.i0.tightId & (has_track0 * (
            (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) &
            (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0)
        trailak4_id = has_track1 * (
            (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) &
            (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1

        df['mjj'] = diak4.mass.max()
        df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max())
        df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max()

        leading_jet_in_horn = ((diak4.i0.abseta < 3.2) &
                               (diak4.i0.abseta > 2.8)).any()
        trailing_jet_in_horn = ((diak4.i1.abseta < 3.2) &
                                (diak4.i1.abseta > 2.8)).any()

        selection.add('hornveto', (df['dPFTkSR'] < 0.8)
                      | ~(leading_jet_in_horn | trailing_jet_in_horn))

        if df['year'] == 2018:
            if df['is_data']:
                metphihem_mask = ~((met_phi > -1.8) & (met_phi < -0.6) &
                                   (df['run'] > 319077))
            else:
                metphihem_mask = pass_all
            selection.add("metphihemextveto", metphihem_mask)
            selection.add('no_el_in_hem',
                          electrons[electrons_in_hem(electrons)].counts == 0)
        else:
            selection.add("metphihemextveto", pass_all)
            selection.add('no_el_in_hem', pass_all)

        selection.add('two_jets', diak4.counts > 0)
        selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
        selection.add('trailak4_pt_eta', trailak4_pt_eta.any())
        selection.add('hemisphere', hemisphere)
        selection.add('leadak4_id', leadak4_id.any())
        selection.add('trailak4_id', trailak4_id.any())
        selection.add('mjj',
                      df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
        selection.add(
            'dphijj',
            df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
        selection.add(
            'detajj',
            df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)

        # Cleaning cuts for signal region
        max_neEmEF = np.maximum(diak4.i0.nef, diak4.i1.nef)
        selection.add('max_neEmEF', (max_neEmEF < 0.7).any())

        vec_b = calculate_vecB(ak4, met_pt, met_phi)
        vec_dphi = calculate_vecDPhi(ak4, met_pt, met_phi, df['TkMET_phi'])

        no_jet_in_trk = (diak4.i0.abseta > 2.5).any() & (diak4.i1.abseta >
                                                         2.5).any()
        no_jet_in_hf = (diak4.i0.abseta < 3.0).any() & (diak4.i1.abseta <
                                                        3.0).any()

        at_least_one_jet_in_hf = (diak4.i0.abseta >
                                  3.0).any() | (diak4.i1.abseta > 3.0).any()
        at_least_one_jet_in_trk = (diak4.i0.abseta <
                                   2.5).any() | (diak4.i1.abseta < 2.5).any()

        # Categorized cleaning cuts
        eemitigation = ((no_jet_in_hf | at_least_one_jet_in_trk) &
                        (vec_dphi < 1.0)) | (
                            (no_jet_in_trk & at_least_one_jet_in_hf) &
                            (vec_b < 0.2))

        selection.add('eemitigation', eemitigation)

        # HF-HF veto in SR
        both_jets_in_hf = (diak4.i0.abseta > 3.0) & (diak4.i1.abseta > 3.0)
        selection.add('veto_hfhf', ~both_jets_in_hf.any())

        # Divide into three categories for trigger study
        if cfg.RUN.TRIGGER_STUDY:
            two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs(
                diak4.i1.eta) <= 2.4)
            two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs(
                diak4.i1.eta) > 2.4)
            one_jet_forward_one_jet_central = (~two_central_jets) & (
                ~two_forward_jets)
            selection.add('two_central_jets', two_central_jets.any())
            selection.add('two_forward_jets', two_forward_jets.any())
            selection.add('one_jet_forward_one_jet_central',
                          one_jet_forward_one_jet_central.any())

        # Dimuon CR
        leadmuon_index = muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge == 0).any())
        selection.add('two_muons', muons.counts == 2)

        # Single muon CR
        selection.add('one_muon', muons.counts == 1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index = electrons.pt.argmax()

        selection.add('one_electron', electrons.counts == 1)
        selection.add('two_electrons', electrons.counts == 2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())


        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge == 0).any())

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index = photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId & photons.barrel

        selection.add('one_photon', photons.counts == 1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig',
                      photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if df['has_lhe_v_pt']:
            output['genvpt_check'].fill(vpt=gen_v_pt,
                                        type="Nano",
                                        dataset=dataset)

        if 'LHE_Njets' in df:
            output['lhe_njets'].fill(dataset=dataset,
                                     multiplicity=df['LHE_Njets'])
        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])
        if 'LHE_HTIncoming' in df:
            output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons,
                                        electrons, photons, cfg)
            weights = pileup_weights(weights, df, evaluator, cfg)
            weights = ak4_em_frac_weights(weights, diak4, evaluator)
            if not (gen_v_pt is None):
                weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt,
                                             df['mjj_gen'])

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask]]
                output['kinematics']['met_phi'] += [met_phi[mask]]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask]]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt]
                output['kinematics']['ak4eta0'] += [
                    ak4[leadak4_index][mask].eta
                ]
                output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [
                    muons[df['is_tight_muon']].counts[mask]
                ]
                output['kinematics']['mupt0'] += [
                    muons[leadmuon_index][mask].pt
                ]
                output['kinematics']['mueta0'] += [
                    muons[leadmuon_index][mask].eta
                ]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [
                    electrons[df['is_tight_electron']].counts[mask]
                ]
                output['kinematics']['elpt0'] += [
                    electrons[leadelectron_index][mask].pt
                ]
                output['kinematics']['eleta0'] += [
                    electrons[leadelectron_index][mask].eta
                ]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [
                    photons[df['is_tight_photon']].counts[mask]
                ]
                output['kinematics']['gpt0'] += [
                    photons[leadphoton_index][mask].pt
                ]
                output['kinematics']['geta0'] += [
                    photons[leadphoton_index][mask].eta
                ]

        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']
            output['sumw_pileup'][dataset] += weights._weights['pileup'].sum()

        regions = vbfhinv_regions(cfg)

        # Get veto weights (only for MC)
        if not df['is_data']:
            veto_weights = get_veto_weights(df, cfg, evaluator, electrons,
                                            muons, taus)

        for region, cuts in regions.items():
            exclude = [None]
            region_weights = copy.deepcopy(weights)

            if not df['is_data']:
                ### Trigger weights
                if re.match(r'cr_(\d+)e.*', region):
                    p_pass_data = 1 - (1 -
                                       evaluator["trigger_electron_eff_data"]
                                       (electrons.etasc, electrons.pt)).prod()
                    p_pass_mc = 1 - (1 - evaluator["trigger_electron_eff_mc"]
                                     (electrons.etasc, electrons.pt)).prod()
                    trigger_weight = p_pass_data / p_pass_mc
                    trigger_weight[np.isnan(trigger_weight)] = 1
                    region_weights.add('trigger', trigger_weight)
                elif re.match(r'cr_(\d+)m.*', region) or re.match(
                        'sr_.*', region):
                    region_weights.add(
                        'trigger_met',
                        evaluator["trigger_met"](df['recoil_pt']))
                elif re.match(r'cr_g.*', region):
                    photon_trigger_sf(region_weights, photons, df)

                # Veto weights
                if re.match('.*no_veto.*', region):
                    exclude = [
                        "muon_id_iso_tight", "muon_id_tight", "muon_iso_tight",
                        "muon_id_loose", "muon_iso_loose", "ele_reco",
                        "ele_id_tight", "ele_id_loose", "tau_id"
                    ]
                    region_weights.add(
                        "veto",
                        veto_weights.partial_weight(include=["nominal"]))

                # HEM-veto weights for signal region MC
                if re.match('^sr_vbf.*', region) and df['year'] == 2018:
                    # Events that lie in the HEM-veto region
                    events_to_weight_mask = (met_phi > -1.8) & (met_phi < -0.6)
                    # Weight is the "good lumi fraction" for 2018
                    weight = 21.1 / 59.7
                    hem_weight = np.where(events_to_weight_mask, weight, 1.0)

                    region_weights.add("hem_weight", hem_weight)

            # This is the default weight for this region
            rweight = region_weights.partial_weight(exclude=exclude)

            # Blinding
            if (self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region][dataset]['all'] += df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' +
                           region][dataset][cutname] += selection.all(
                               *cuts[:icut + 1]).sum()

            mask = selection.all(*cuts)

            if cfg.RUN.SAVE.TREE:
                if region in ['cr_1e_vbf', 'cr_1m_vbf']:
                    output['tree_int64'][region][
                        "event"] += processor.column_accumulator(
                            df["event"][mask])
                    output['tree_float16'][region][
                        "gen_v_pt"] += processor.column_accumulator(
                            np.float16(gen_v_pt[mask]))
                    output['tree_float16'][region][
                        "gen_mjj"] += processor.column_accumulator(
                            np.float16(df['mjj_gen'][mask]))
                    output['tree_float16'][region][
                        "recoil_pt"] += processor.column_accumulator(
                            np.float16(df["recoil_pt"][mask]))
                    output['tree_float16'][region][
                        "recoil_phi"] += processor.column_accumulator(
                            np.float16(df["recoil_phi"][mask]))
                    output['tree_float16'][region][
                        "mjj"] += processor.column_accumulator(
                            np.float16(df["mjj"][mask]))

                    output['tree_float16'][region][
                        "leadak4_pt"] += processor.column_accumulator(
                            np.float16(diak4.i0.pt[mask]))
                    output['tree_float16'][region][
                        "leadak4_eta"] += processor.column_accumulator(
                            np.float16(diak4.i0.eta[mask]))
                    output['tree_float16'][region][
                        "leadak4_phi"] += processor.column_accumulator(
                            np.float16(diak4.i0.phi[mask]))

                    output['tree_float16'][region][
                        "trailak4_pt"] += processor.column_accumulator(
                            np.float16(diak4.i1.pt[mask]))
                    output['tree_float16'][region][
                        "trailak4_eta"] += processor.column_accumulator(
                            np.float16(diak4.i1.eta[mask]))
                    output['tree_float16'][region][
                        "trailak4_phi"] += processor.column_accumulator(
                            np.float16(diak4.i1.phi[mask]))

                    output['tree_float16'][region][
                        "minDPhiJetRecoil"] += processor.column_accumulator(
                            np.float16(df["minDPhiJetRecoil"][mask]))
                    if '_1e_' in region:
                        output['tree_float16'][region][
                            "leadlep_pt"] += processor.column_accumulator(
                                np.float16(electrons.pt.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_eta"] += processor.column_accumulator(
                                np.float16(electrons[
                                    electrons.pt.argmax()].eta.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_phi"] += processor.column_accumulator(
                                np.float16(electrons[
                                    electrons.pt.argmax()].phi.max()[mask]))
                    elif '_1m_' in region:
                        output['tree_float16'][region][
                            "leadlep_pt"] += processor.column_accumulator(
                                np.float16(muons.pt.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_eta"] += processor.column_accumulator(
                                np.float16(
                                    muons[muons.pt.argmax()].eta.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_phi"] += processor.column_accumulator(
                                np.float16(
                                    muons[muons.pt.argmax()].phi.max()[mask]))

                    for name, w in region_weights._weights.items():
                        output['tree_float16'][region][
                            f"weight_{name}"] += processor.column_accumulator(
                                np.float16(w[mask]))
                    output['tree_float16'][region][
                        f"weight_total"] += processor.column_accumulator(
                            np.float16(rweight[mask]))
                if region == 'inclusive':
                    output['tree_int64'][region][
                        "event"] += processor.column_accumulator(
                            df["event"][mask])
                    for name in selection.names:
                        output['tree_bool'][region][
                            name] += processor.column_accumulator(
                                np.bool_(selection.all(*[name])[mask]))
            # Save the event numbers of events passing this selection
            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])

            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=rweight[mask])

            fill_mult('ak4_mult', ak4[ak4.pt > 30])
            fill_mult('bjet_mult', bjets)
            fill_mult('loose_ele_mult', electrons)
            fill_mult('tight_ele_mult', electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult', muons)
            fill_mult('tight_muo_mult', muons[df['is_tight_muon']])
            fill_mult('tau_mult', taus)
            fill_mult('photon_mult', photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(dataset=dataset, region=region, **kwargs)

            # Monitor weights
            for wname, wvalue in region_weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])
                ezfill("weights_wide",
                       weight_type=wname,
                       weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, rweight[mask])
            w_alljets_nopref = weight_shape(
                ak4[mask].eta,
                region_weights.partial_weight(exclude=exclude +
                                              ['prefire'])[mask])

            ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets)

            ezfill('ak4_eta_nopref',
                   jeteta=ak4[mask].eta.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_phi_nopref',
                   jetphi=ak4[mask].phi.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_pt_nopref',
                   jetpt=ak4[mask].pt.flatten(),
                   weight=w_alljets_nopref)

            # Leading ak4
            w_diak4 = weight_shape(diak4.pt[mask], rweight[mask])
            ezfill('ak4_eta0',
                   jeteta=diak4.i0.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi0',
                   jetphi=diak4.i0.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt0',
                   jetpt=diak4.i0.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw0',
                   jetpt=diak4.i0.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf0',
                   frac=diak4.i0.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf0',
                   frac=diak4.i0.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst0',
                   nconst=diak4.i0.nconst[mask].flatten(),
                   weight=w_diak4)

            # Trailing ak4
            ezfill('ak4_eta1',
                   jeteta=diak4.i1.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi1',
                   jetphi=diak4.i1.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt1',
                   jetpt=diak4.i1.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw1',
                   jetpt=diak4.i1.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf1',
                   frac=diak4.i1.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf1',
                   frac=diak4.i1.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst1',
                   nconst=diak4.i1.nconst[mask].flatten(),
                   weight=w_diak4)

            # B tag discriminator
            btag = getattr(ak4, cfg.BTAG.ALGO)
            w_btag = weight_shape(btag[mask], rweight[mask])
            ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag)

            # MET
            ezfill('dpfcalo_cr',
                   dpfcalo=df["dPFCaloCR"][mask],
                   weight=rweight[mask])
            ezfill('dpfcalo_sr',
                   dpfcalo=df["dPFCaloSR"][mask],
                   weight=rweight[mask])
            ezfill('met', met=met_pt[mask], weight=rweight[mask])
            ezfill('met_phi', phi=met_phi[mask], weight=rweight[mask])
            ezfill('recoil',
                   recoil=df["recoil_pt"][mask],
                   weight=rweight[mask])
            ezfill('recoil_phi',
                   phi=df["recoil_phi"][mask],
                   weight=rweight[mask])
            ezfill('dphijm',
                   dphi=df["minDPhiJetMet"][mask],
                   weight=rweight[mask])
            ezfill('dphijr',
                   dphi=df["minDPhiJetRecoil"][mask],
                   weight=rweight[mask])

            ezfill('dphijj', dphi=df["dphijj"][mask], weight=rweight[mask])
            ezfill('detajj', deta=df["detajj"][mask], weight=rweight[mask])
            ezfill('mjj', mjj=df["mjj"][mask], weight=rweight[mask])

            if gen_v_pt is not None:
                ezfill('gen_vpt',
                       vpt=gen_v_pt[mask],
                       weight=df['Generator_weight'][mask])
                ezfill('gen_mjj',
                       mjj=df['mjj_gen'][mask],
                       weight=df['Generator_weight'][mask])

            # Photon CR data-driven QCD estimate
            if df['is_data'] and re.match("cr_g.*", region) and re.match(
                    "(SinglePhoton|EGamma).*", dataset):
                w_imp = photon_impurity_weights(
                    photons[leadphoton_index].pt.max()[mask], df["year"])
                output['mjj'].fill(dataset=data_driven_qcd_dataset(dataset),
                                   region=region,
                                   mjj=df["mjj"][mask],
                                   weight=rweight[mask] * w_imp)
                output['recoil'].fill(dataset=data_driven_qcd_dataset(dataset),
                                      region=region,
                                      recoil=df["recoil_pt"][mask],
                                      weight=rweight[mask] * w_imp)

            # Uncertainty variations
            if df['is_lo_z'] or df['is_nlo_z'] or df['is_lo_z_ewk']:
                theory_uncs = [x for x in cfg.SF.keys() if x.startswith('unc')]
                for unc in theory_uncs:
                    reweight = evaluator[unc](gen_v_pt)
                    w = (region_weights.weight() * reweight)[mask]
                    ezfill('mjj_unc',
                           mjj=df['mjj'][mask],
                           uncertainty=unc,
                           weight=w)

            # Two dimensional
            ezfill('recoil_mjj',
                   recoil=df["recoil_pt"][mask],
                   mjj=df["mjj"][mask],
                   weight=rweight[mask])

            # Muons
            if '_1m_' in region or '_2m_' in region or 'no_veto' in region:
                w_allmu = weight_shape(muons.pt[mask], rweight[mask])
                ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu)
                ezfill('muon_pt_abseta',
                       pt=muons.pt[mask].flatten(),
                       abseta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_mt', mt=df['MT_mu'][mask], weight=rweight[mask])
                ezfill('muon_eta',
                       eta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_phi',
                       phi=muons.phi[mask].flatten(),
                       weight=w_allmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], rweight[mask])
                ezfill('muon_pt0',
                       pt=dimuons.i0.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_pt1',
                       pt=dimuons.i1.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta0',
                       eta=dimuons.i0.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta1',
                       eta=dimuons.i1.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi0',
                       phi=dimuons.i0.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi1',
                       phi=dimuons.i1.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_pt',
                       pt=dimuons.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_eta',
                       eta=dimuons.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_mass',
                       dilepton_mass=dimuons.mass[mask].flatten(),
                       weight=w_dimu)

            # Electrons
            if '_1e_' in region or '_2e_' in region or 'no_veto' in region:
                w_allel = weight_shape(electrons.pt[mask], rweight[mask])
                ezfill('electron_pt',
                       pt=electrons.pt[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_pt_eta',
                       pt=electrons.pt[mask].flatten(),
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_mt',
                       mt=df['MT_el'][mask],
                       weight=rweight[mask])
                ezfill('electron_eta',
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_phi',
                       phi=electrons.phi[mask].flatten(),
                       weight=w_allel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask], rweight[mask])
                ezfill('electron_pt0',
                       pt=dielectrons.i0.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_pt1',
                       pt=dielectrons.i1.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta0',
                       eta=dielectrons.i0.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta1',
                       eta=dielectrons.i1.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi0',
                       phi=dielectrons.i0.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi1',
                       phi=dielectrons.i1.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_pt',
                       pt=dielectrons.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_eta',
                       eta=dielectrons.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_mass',
                       dilepton_mass=dielectrons.mass[mask].flatten(),
                       weight=w_diel)

            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(
                    photons[leadphoton_index].pt[mask], rweight[mask])
                ezfill('photon_pt0',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_eta0',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_phi0',
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_pt0_recoil',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       recoil=df['recoil_pt'][mask
                                              & (leadphoton_index.counts > 0)],
                       weight=w_leading_photon)
                ezfill('photon_eta_phi',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], rweight[mask])

            # Tau
            if 'no_veto' in region:
                w_all_taus = weight_shape(taus.pt[mask], rweight[mask])
                ezfill("tau_pt", pt=taus.pt[mask].flatten(), weight=w_all_taus)

            # PV
            ezfill('npv', nvtx=df['PV_npvs'][mask], weight=rweight[mask])
            ezfill('npvgood',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=rweight[mask])

            ezfill('npv_nopu',
                   nvtx=df['PV_npvs'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
            ezfill('npvgood_nopu',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])

            ezfill('rho_all',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=region_weights.partial_weight(exclude=exclude)[mask])
            ezfill('rho_central',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=region_weights.partial_weight(exclude=exclude)[mask])
            ezfill('rho_all_nopu',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
            ezfill('rho_central_nopu',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
        return output
    def process(self, df):
        dataset = df['dataset']
        if self._debug:
            print("Processing dataframe from", dataset)
        isRealData = dataset in ["JetHT", "SingleMuon", "data_obs_mu", "data_obs_jet"]

        self.build_leading_ak8_variables(df)
        self.build_subleading_ak8_variables(df)
        self.build_ak4_variables(df)
        self.build_met_systematics(df)
        df['muon_dphi'] = np.abs(deltaphi(df['vmuoLoose0_phi'], df['AK8Puppijet0_phi']))

        selection = processor.PackedSelection()
        if isRealData:
            # Only take jet triggers from JetHT, single muon triggers from SingleMuon dataset
            # necessary but not sufficient condition to prevent double-counting
            # (this plus mutually exclusive offline selections are sufficient)
            selection.add('trigger', (df['triggerBits'] & self._corrections[f'{self._year}_triggerMask']).astype('bool') & (dataset=="JetHT"))
            selection.add('mutrigger', ((df['triggerBits']&1) & df['passJson']).astype('bool') & (dataset=="SingleMuon"))
            if self._debug:
                print("Trigger pass/all", selection.all('trigger').sum(), df.size)
                print("Muon trigger pass/all", selection.all('mutrigger').sum(), df.size)
        else:
            selection.add('trigger', np.ones(df.size, dtype='bool'))
            selection.add('mutrigger', np.ones(df.size, dtype='bool'))

        btagLooseWPs = {
            '2016': 0.6321,
            '2017': 0.4941,
            '2018': 0.4184,
        }

        selection.add('noLeptons', (df['neleLoose']==0) & (df['nmuLoose']==0) & (df['ntau']==0))
        selection.add('oneMuon', (df['neleLoose']==0) & (df['nmuLoose']==1) & (df['ntau']==0))
        selection.add('muonAcceptance', (df['vmuoLoose0_pt'] > 55.) & (np.abs(df['vmuoLoose0_eta']) < 2.1))
        selection.add('muonDphiAK8', df['muon_dphi'] > 2*np.pi/3)
        selection.add('ak4btagMediumDR08', df['ak4_leadingDeepCSV_dR08'] > btagLooseWPs[self._year])  # at least one passes medium cut
        selection.add('antiak4btagMediumOppHem', df['opposite_ak4_leadingDeepCSV'] < btagLooseWPs[self._year])  # none pass
        selection.add('tightVjet', df['AK8Puppijet0_isTightVJet'] != 0)
        selection.add('n2ddtPass', df['ak8jet_n2ddt'] < 0)
        selection.add('jetMass', df['AK8Puppijet0_msd'] > 40.)
        selection.add('deepcvb', df['AK8Puppijet0_deepdoublecvb'] > 0.2)

        selection.add('jetKinematics', df['AK8Puppijet0_pt'] > 450.)
        selection.add('jetKinematicsMuonCR', df['AK8Puppijet0_pt'] > 400.)
        selection.add('pfmet', df['pfmet'] < 140.)

        regions = {}
        regions['noselection'] = {}
        regions['preselection'] = {'trigger', 'noLeptons'}
        regions['signalregion'] = {'trigger', 'noLeptons', 'jetKinematics', 'pfmet', 'n2ddtPass', 'tightVjet', 'antiak4btagMediumOppHem'}
        regions['muoncontrol'] = {'mutrigger', 'oneMuon', 'muonAcceptance', 'jetKinematicsMuonCR', 'n2ddtPass', 'tightVjet', 'ak4btagMediumDR08', 'muonDphiAK8'}
        regions['hCCsignalregion'] = {'trigger', 'noLeptons', 'jetKinematics', 'pfmet', 'n2ddtPass', 'tightVjet', 'antiak4btagMediumOppHem', 'deepcvb'}
        regions['hCCmuoncontrol'] = {'mutrigger', 'oneMuon', 'muonAcceptance', 'jetKinematicsMuonCR', 'n2ddtPass', 'tightVjet', 'ak4btagMediumDR08', 'muonDphiAK8', 'deepcvb'}

        shiftSystematics = ['JESUp', 'JESDown', 'JERUp', 'JERDown']
        shiftedQuantities = {'AK8Puppijet0_pt', 'pfmet'}
        shiftedSelections = {'jetKinematics', 'jetKinematicsMuonCR', 'pfmet'}
        for syst in shiftSystematics:
            selection.add('jetKinematics'+syst, df['AK8Puppijet0_pt_'+syst] > 450)
            selection.add('jetKinematicsMuonCR'+syst, df['AK8Puppijet0_pt_'+syst] > 400.)
            selection.add('pfmet'+syst, df['pfmet_'+syst] < 140.)

        # mass shift applied only to V-matched data
        # https://github.com/kakwok/ZPrimePlusJet/blob/PerBinEff/fitting/PbbJet/buildRhalphabetHbb.py#L30
        if not isRealData:
            shiftSystematics.append('matchedUp')
            shiftedQuantities.add('AK8Puppijet0_msd')
            msdshifts = {'2016': 1.001, '2017': 0.979, '2018': 0.970}
            df['AK8Puppijet0_msd_matchedUp'] = msdshifts[self._year] * df['AK8Puppijet0_msd']

        weights = processor.Weights(df.size)

        if not isRealData:
            # SumWeights is sum(scale1fb), so we need to use full value here
            weights.add('genweight', df['scale1fb'])

        if not self._skipPileup:
            if self._year == '2017' and dataset in self._corrections['2017_pileupweight_dataset']:
                weights.add('pileupweight',
                            self._corrections['2017_pileupweight_dataset'][dataset](df['npu']),
                            self._corrections['2017_pileupweight_dataset_puUp'][dataset](df['npu']),
                            self._corrections['2017_pileupweight_dataset_puDown'][dataset](df['npu']),
                            )
            elif self._year != '2017':
                weights.add('pileupweight',
                            self._corrections[f'{self._year}_pileupweight'](df['npu']),
                            self._corrections[f'{self._year}_pileupweight_puUp'](df['npu']),
                            self._corrections[f'{self._year}_pileupweight_puDown'](df['npu']),
                            )

        # TODO unc.
        if self._year == '2017' and 'ZJetsToQQ_HT' in dataset:
            nlo_over_lo_qcd = self._corrections['2017_Z_nlo_qcd'](df['genVPt'])
            nlo_over_lo_ewk = self._corrections['Z_nlo_over_lo_ewk'](df['genVPt'])
            weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk)
        elif self._year == '2017' and 'WJetsToQQ_HT' in dataset:
            nlo_over_lo_qcd = self._corrections['2017_W_nlo_qcd'](df['genVPt'])
            nlo_over_lo_ewk = self._corrections['W_nlo_over_lo_ewk'](df['genVPt'])
            weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk)
        elif self._year == '2016' and 'DYJetsToQQ' in dataset:
            nlo_over_lo_qcd = self._corrections['2016_Z_nlo_qcd'](df['genVPt'])
            nlo_over_lo_ewk = self._corrections['Z_nlo_over_lo_ewk'](df['genVPt'])
            weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk)
        elif self._year == '2016' and 'WJetsToQQ' in dataset:
            nlo_over_lo_qcd = self._corrections['2016_W_nlo_qcd'](df['genVPt'])
            nlo_over_lo_ewk = self._corrections['W_nlo_over_lo_ewk'](df['genVPt'])
            weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk)

        if not isRealData:
            # handle weight systematics for signal region
            def regionMask(w):
                if self._skipTrigger:
                    return np.ones(df.size)
                return np.where(selection.all('noLeptons'), w, 1.)
            weights.add('trigweight',
                        regionMask(self._corrections[f'{self._year}_trigweight_msd_pt'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])),
                        regionMask(self._corrections[f'{self._year}_trigweight_msd_pt_trigweightUp'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])),
                        regionMask(self._corrections[f'{self._year}_trigweight_msd_pt_trigweightDown'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])),
                        )
            vmatch = (np.abs(deltaphi(df['AK8Puppijet0_phi'], df['genVPhi'])) < 0.8) & (np.abs(df['AK8Puppijet0_pt']-df['genVPt'])/df['genVPt'] < 0.5) & (np.abs(df['AK8Puppijet0_msd']-df['genVMass'])/df['genVMass'] < 0.3)
            weights.add('matched', np.ones(df.size, dtype='f'), vmatch.astype('f'), 1.-vmatch)

            # handle weight systematics for muon CR
            def regionMask(w):
                if self._skipTrigger:
                    return np.ones(df.size)
                return np.where(selection.all('oneMuon'), w, 1.)
            mu_abseta = np.abs(df['vmuoLoose0_eta'])
            weights.add('mutrigweight',
                        regionMask(self._corrections[f'{self._year}_mutrigweight_pt_abseta'](df['vmuoLoose0_pt'], mu_abseta)),
                        regionMask(self._corrections[f'{self._year}_mutrigweight_pt_abseta_mutrigweightShift'](df['vmuoLoose0_pt'], mu_abseta)),
                        shift=True
                        )
            weights.add('muidweight',
                        regionMask(self._corrections[f'{self._year}_muidweight_abseta_pt'](mu_abseta, df['vmuoLoose0_pt'])),
                        regionMask(self._corrections[f'{self._year}_muidweight_abseta_pt_muidweightShift'](mu_abseta, df['vmuoLoose0_pt'])),
                        shift=True
                        )
            weights.add('muisoweight',
                        regionMask(self._corrections[f'{self._year}_muisoweight_abseta_pt'](mu_abseta, df['vmuoLoose0_pt'])),
                        regionMask(self._corrections[f'{self._year}_muisoweight_abseta_pt_muisoweightShift'](mu_abseta, df['vmuoLoose0_pt'])),
                        shift=True
                        )

        if self._debug:
            print("Weight statistics:")
            pprint.pprint(weights._weightStats, indent=4)

        hout = self.accumulator.identity()
        for histname, h in hout.items():
            if not isinstance(h, hist.Hist):
                continue
            if not all(k in df or k == 'systematic' for k in h.fields):
                # Cannot fill this histogram due to missing fields
                # is this an error, warning, or ignorable?
                if self._debug:
                    print("Missing fields %r from %r" % (set(h.fields) - set(df.keys()), h))
                continue
            fields = {k: df[k] for k in h.fields if k in df}
            region = [r for r in regions.keys() if r in histname.split('_')]

            if 'nminus1' in histname:
                _, sel, region = histname.split('_')
                cut = regions[region] - {sel}
                weight = weights.weight() * selection.all(*cut)
                h.fill(**fields, weight=weight)
            elif len(region) == 1:
                region = region[0]
                weight = weights.weight()
                cut = selection.all(*regions[region])
                h.fill(systematic="", **fields, weight=weight*cut)
                if 'systematic' in h.fields:
                    if self._debug:
                        print("Filling systematics for %s" % histname)
                    systs = set(weights.variations)
                    systs.update(shiftSystematics)
                    for syst in systs:
                        if self._debug:
                            print("  Filling systematic %s" % syst)
                        fields_syst = fields
                        for val in shiftedQuantities:
                            if val+'_'+syst in df:
                                fields_syst[val] = df[val+'_'+syst]
                                if self._debug:
                                    print("    Replacing field %s with %s" % (val, val+'_'+syst))
                        if syst in weights.variations:
                            weight_syst = weights.weight(syst)
                            if self._debug:
                                print("    Using modified weight")
                        else:
                            weight_syst = weight
                        if syst in set(shiftSystematics):
                            cut_syst = set()
                            for sel in regions[region]:
                                if sel in shiftedSelections and sel+syst in selection.names:
                                    cut_syst.add(sel+syst)
                                    if self._debug:
                                        print("    Replacing cut %s with systematic-shifted %s" % (sel, sel+syst))
                                else:
                                    cut_syst.add(sel)
                            cut_syst = selection.all(*cut_syst)
                        else:
                            cut_syst = cut
                        h.fill(systematic=syst, **fields_syst, weight=weight_syst*cut_syst)
            elif len(region) > 1:
                raise ValueError("Histogram '%s' has a name matching multiple region definitions: %r" % (histname, region))
            else:
                raise ValueError("Histogram '%s' does not fall into any region definitions." % (histname, ))

        if not isRealData:
            if 'skim_sumw' in df:
                # hacky way to only accumulate file-level information once
                if df['skim_sumw'] is not None:
                    hout['sumw'][dataset] += df['skim_sumw']
            else:
                hout['sumw'][dataset] += np.sum(df['scale1fb'])
        return hout
Example #9
0
    def process(self, events):

        # get meta infos
        dataset = events.metadata["dataset"]
        isRealData = not hasattr(events, "genWeight")
        n_events = len(events)
        selection = processor.PackedSelection()
        weights = processor.Weights(n_events)
        output = self.accumulator.identity()

        # weights
        if not isRealData:
            output['sumw'][dataset] += awkward1.sum(events.genWeight)
        
        # trigger
        triggers = {}
        for channel in ["e","mu"]:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._trigger[channel]:
                try:
                    trigger = trigger | events.HLT[t]
                except:
                    warnings.warn("Missing trigger %s" % t, RuntimeWarning)
            triggers[channel] = trigger
            
        # met filter
        met_filters = ["goodVertices",
                       "globalSuperTightHalo2016Filter",
                       "HBHENoiseFilter",
                       "HBHENoiseIsoFilter",
                       "EcalDeadCellTriggerPrimitiveFilter",
                       "BadPFMuonFilter",
                       ]
        met_filters_mask = np.ones(len(events), dtype='bool')
        for t in met_filters:
            met_filters_mask = met_filters_mask & events.Flag[t]
        selection.add("met_filter", awkward1.to_numpy(met_filters_mask))
        
        # load objects
        muons = events.Muon
        electrons = events.Electron
        jets = events.Jet
        fatjets = events.FatJet
        subjets = events.SubJet
        fatjetsLS = events.FatJetLS
        met = events.MET
        
        # muons
        goodmuon = (
            (muons.mediumId)
            & (muons.miniPFRelIso_all <= 0.2)
            & (muons.pt >= 27)
            & (abs(muons.eta) <= 2.4)
            & (abs(muons.dz) < 0.1)
            & (abs(muons.dxy) < 0.05)
            & (muons.sip3d < 4)
        )
        good_muons = muons[goodmuon]
        ngood_muons = awkward1.sum(goodmuon, axis=1)

        # electrons
        goodelectron = (
            (electrons.mvaFall17V2noIso_WP90)
            & (electrons.pt >= 30)
            & (abs(electrons.eta) <= 1.479)
            & (abs(electrons.dz) < 0.1)
            & (abs(electrons.dxy) < 0.05)
            & (electrons.sip3d < 4)
        )
        good_electrons = electrons[goodelectron]
        ngood_electrons = awkward1.sum(goodelectron, axis=1)
        
        # good leptons
        good_leptons = awkward1.concatenate([good_muons, good_electrons], axis=1)
        good_leptons = good_leptons[awkward1.argsort(good_leptons.pt)]
        
        # lepton candidate
        candidatelep = awkward1.firsts(good_leptons)
        
        # lepton channel selection
        selection.add("ch_e", awkward1.to_numpy((triggers["e"]) & (ngood_electrons==1) & (ngood_muons==0))) # not sure if need to require 0 muons or 0 electrons in the next line
        selection.add("ch_mu", awkward1.to_numpy((triggers["mu"]) & (ngood_electrons==0) & (ngood_muons==1)))
        
        # jets
        ht = awkward1.sum(jets[jets.pt > 30].pt,axis=1)
        selection.add("ht_400", awkward1.to_numpy(ht>=400))
        goodjet = (
            (jets.isTight)
            & (jets.pt > 30)
            & (abs(jets.eta) <= 2.5)
            )
        good_jets = jets[goodjet]

        # fat jets
        jID = "isTight"
        # TODO: add mass correction

        # a way to get the first two subjets
        # cart = awkward1.cartesian([fatjets, subjets], nested=True)
        # idxes = awkward1.pad_none(awkward1.argsort(cart['0'].delta_r(cart['1'])), 2, axis=2)
        # sj1 = subjets[idxes[:,:,0]]
        # sj2 = subjets[idxes[:,:,1]]
        
        good_fatjet = (
            (getattr(fatjets, jID))
            & (abs(fatjets.eta) <= 2.4)
            & (fatjets.pt > 50)
            & (fatjets.msoftdrop > 30)
            & (fatjets.msoftdrop < 210)
            #& (fatjets.pt.copy(content=fatjets.subjets.content.counts) == 2) # TODO: require 2 subjets?
            # this can probably be done w FatJet_subJetIdx1 or FatJet_subJetIdx2
            & (awkward1.all(fatjets.subjets.pt >= 20))
            & (awkward1.all(abs(fatjets.subjets.eta) <= 2.4))
        )
        good_fatjets = fatjets[good_fatjet]

        # hbb candidate
        mask_hbb = (
            (good_fatjets.pt > 200)
            & (good_fatjets.delta_r(candidatelep) > 2.0)
            )
        candidateHbb = awkward1.firsts(good_fatjets[mask_hbb])

        # b-tag #& (good_fatjets.particleNetMD_Xbb > 0.9)
        selection.add('hbb_btag',awkward1.to_numpy(candidateHbb.deepTagMD_ZHbbvsQCD >= 0.8)) # score would be larger for tight category (0.97)  
        
        # No AK4 b-tagged jets away from bb jet
        jets_HbbV = jets[good_jets.delta_r(candidateHbb) >= 1.2]
        selection.add('hbb_vetobtagaway',  awkward1.to_numpy(awkward1.max(jets_HbbV.btagDeepB, axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._year]['medium']))
        
        # fat jets Lepton Subtracted
        # wjj candidate
        mask_wjj = (
            (fatjetsLS.pt > 50)
            & (fatjetsLS.delta_r(candidatelep) > 1.2)
            # need to add 2 subjets w pt > 20 & eta<2.4
            # need to add ID?
            )
        candidateWjj = awkward1.firsts(fatjetsLS[mask_wjj][awkward1.argmin(fatjetsLS[mask_wjj].delta_r(candidatelep),axis=1,keepdims=True)])
        # add t2/t1 <= 0.75 (0.45 HP)
        selection.add('hww_mass',  awkward1.to_numpy(candidateWjj.mass >= 10))

        print('met ',met)
        # wjjlnu info
        #HSolverLiInfo  hwwInfoLi;
        # qqSDmass = candidateWjj.msoftdrop
        # hwwLi   = hSolverLi->minimize(candidatelep.p4(), met.p4(), wjjcand.p4(), qqSDmass, hwwInfoLi)
        #neutrino = hwwInfoLi.neutrino;
        #wlnu     = hwwInfoLi.wlnu;
        #wqq      = hwwInfoLi.wqqjet;
        #hWW      = hwwInfoLi.hWW;
        #wwDM     = PhysicsUtilities::deltaR( wlnu,wqq) * hWW.pt()/2.0;
        # add dlvqq <= 11 (2.5 HP)
               
        # in the meantime let's add the mass
        '''
        mm = (candidatejet - candidatelep).mass2
        jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*candidatejet.mass
        joffshell = jmass < 62.5
        massassumption = 80.*joffshell + (125 - 80.)*~joffshell
        x = massassumption**2/(2*candidatelep.pt*met.pt) + np.cos(candidatelep.phi - met.phi)
        met_eta = (
            (x < 1)*np.arcsinh(x*np.sinh(candidatelep.eta))
            + (x > 1)*(
                candidatelep.eta - np.sign(candidatelep.eta)*np.arccosh(candidatelep.eta)
                )
            )
        met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),np.array([0.]),np.array([0.]),np.array([0.]))
        if met.size > 0:
            met_p4 = TLorentzVectorArray.from_ptetaphim(met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size))
        
        # hh system
        candidateHH = candidateWjj + met_p4 + candidateHbb
        selection.add('hh_mass', candidateHH.mass >= 700)
        selection.add('hh_centrality', candidateHH.pt/candidateHH.mass >= 0.3)
        '''
        
        channels = {"e": ["met_filter","ch_e","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"], #,"hh_mass","hh_centrality"],
                    "mu": ["met_filter","ch_mu","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"] #,"hh_mass","hh_centrality"],
                    }

        # need to add gen info
        
        if not isRealData:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            
        for channel, cuts in channels.items():
            allcuts = set()
            output['cutflow'].fill(dataset=dataset, channel=channel, cut=0, weight=weights.weight())
            for i, cut in enumerate(cuts):
                allcuts.add(cut)
                cut = selection.all(*allcuts)
                output['cutflow'].fill(dataset=dataset, channel=channel, cut=i + 1, weight=weights.weight()[cut])

        return output
Example #10
0
    def process(self, df):
        output = self.accumulator.identity()
        if df.size == 0: return output

        dataset = df['dataset']
        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        ak4jets = JaggedCandidateArray.candidatesfromcounts(
            df['akjet_ak4PFJetsCHS_p4'],
            px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'].content,
            py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'].content,
            pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'].content,
            energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'].content,
            jetid=df['akjet_ak4PFJetsCHS_jetid'].content,
            deepcsv=df['hftagscore_DeepCSV_b'].content,
        )
        deepcsv_tight = np.bitwise_and(ak4jets.deepcsv, 1 << 2) == (1 << 2)
        ak4jets.add_attributes(deepcsvTight=deepcsv_tight)
        ak4jets = ak4jets[ak4jets.jetid & (ak4jets.pt > 20) &
                          (np.abs(ak4jets.eta) < 2.5)]

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'].content,
            py=df['pfjet_p4.fCoordinates.fY'].content,
            pz=df['pfjet_p4.fCoordinates.fZ'].content,
            energy=df['pfjet_p4.fCoordinates.fT'].content,
            lxy=df['pfjet_klmvtx_lxy'].content,
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype == 3).sum()
        ndsa = (ljdautype == 8).sum()
        isegammajet = (npfmu == 0) & (ndsa == 0)
        ispfmujet = (npfmu >= 2) & (ndsa == 0)
        isdsajet = ndsa > 0
        label = isegammajet.astype(int) * 1 + ispfmujet.astype(
            int) * 2 + isdsajet.astype(int) * 3
        leptonjets.add_attributes(label=label, ndsa=ndsa)
        nmu = ((ljdautype == 3) | (ljdautype == 8)).sum()
        leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0))
        ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum()
        leptonjets.add_attributes(qsum=ljdaucharge)
        leptonjets.add_attributes(
            isneutral=(leptonjets.iseltype
                       | (leptonjets.ismutype & (leptonjets.qsum == 0))))
        leptonjets.add_attributes(
            displaced=((np.abs(leptonjets.lxy) >= 5) |
                       (np.isnan(leptonjets.lxy) & leptonjets.ismutype)
                       ))  # non-vertex treated as displaced too
        ljdsamuSubset = fromNestNestIndexArray(
            df['dsamuon_isSubsetFilteredCosmic1Leg'],
            awkward.fromiter(df['pfjet_pfcand_dsamuonIdx']))
        leptonjets.add_attributes(nocosmic=(ljdsamuSubset.sum() == 0))

        leptonjets = leptonjets[(leptonjets.nocosmic) & (leptonjets.pt > 30)]

        ## __twoleptonjets__ AND >=1 displaced
        twoleptonjets = (leptonjets.counts >= 2) & (
            leptonjets.ismutype.sum() >= 1) & (leptonjets.displaced.sum() >= 1)
        dileptonjets = leptonjets[twoleptonjets]
        ak4jets = ak4jets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size == 0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum() == 1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents
                         & muljInLeading2Events).astype(int) * 1

        doubleMuljEvents = dileptonjets.ismutype.sum() == 2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        cuts = [
            ((lj0.isneutral) & (lj1.isneutral)).flatten(),  # both 'neutral'
            (np.abs(lj0.p4.delta_phi(lj1.p4)) >
             np.pi / 2).flatten(),  # dphi > pi/2
            (~channel_2mu2e.astype(bool)) |
            (channel_2mu2e.astype(bool) &
             (((lj0.iseltype) & (lj0.pt > 60)) |
              ((lj1.iseltype) & (lj1.pt > 60))).flatten()),  # EGMpt0>60
            ak4jets.counts < 3,  # N(jets) < 4
            ak4jets[(ak4jets.pt > 30) & (np.abs(ak4jets.eta) < 2.4)
                    & ak4jets.deepcsvTight].counts == 0,  # N(tightB)==0
        ]

        if self.region == 'CR':
            cuts[1] = ~cuts[1]
        if self.enforceNeutral == False:
            cuts[0] = ~cuts[0]

        for i, c in enumerate(itertools.accumulate(cuts, np.logical_and)):
            output['count'].fill(dataset=dataset,
                                 cnt=np.ones_like(wgt[c]) * i,
                                 weight=wgt[c],
                                 channel=channel_[c])

        return output
Example #11
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_w_ewk'] = is_lo_w_ewk(dataset)
        df['is_lo_z_ewk'] = is_lo_z_ewk(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[
            'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[
                'is_lo_w_ewk'] | df['is_lo_z_ewk']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[
                'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']:
            gen = setup_gen_candidates(df)
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_dress']
        elif df['is_lo_g']:
            gen = setup_gen_candidates(df)
            gen_v_pt = gen[(gen.pdg == 22) & (gen.status == 1)].pt.max()

        # Generator-level leading dijet mass
        if df['has_lhe_v_pt']:
            genjets = setup_lhe_cleaned_genjets(df)
            digenjet = genjets[:, :2].distincts()
            df['mjj_gen'] = digenjet.mass.max()

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates(
            df, cfg)

        # Filtering ak4 jets according to pileup ID
        ak4 = ak4[ak4.puid]
        bjets = bjets[bjets.puid]

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts == 1) *
                       mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts == 1) *
                       mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index = ak4.pt.argmax()

        elejet_pairs = ak4[:, :1].cross(electrons)
        df['dREleJet'] = np.hypot(
            elejet_pairs.i0.eta - elejet_pairs.i1.eta,
            dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:, :1].cross(muons)
        df['dRMuonJet'] = np.hypot(
            muonjet_pairs.i0.eta - muonjet_pairs.i1.eta,
            dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons,
                                                   muons, photons)
        df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]
        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4,
                                                  df['recoil_phi'],
                                                  njet=4,
                                                  ptmin=30,
                                                  etamax=4.7)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4,
                                               met_phi,
                                               njet=4,
                                               ptmin=30,
                                               etamax=4.7)
        selection = processor.PackedSelection()

        # Triggers
        pass_all = np.ones(df.size) == 1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)

        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts == 0)
        selection.add('veto_muo', muons.counts == 0)
        selection.add('veto_photon', photons.counts == 0)
        selection.add('veto_tau', taus.counts == 0)
        selection.add('veto_b', bjets.counts == 0)
        selection.add('mindphijr',
                      df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('dpfcalo',
                      np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL)

        if (cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018
                and not cfg.RUN.SYNC):
            selection.add('hemveto', df['hemveto'])
        else:
            selection.add('hemveto', np.ones(df.size) == 1)

        # AK4 dijet
        diak4 = ak4[:, :2].distincts()
        leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & (
            np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA)
        trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & (
            np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA)
        hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any()
        has_track0 = np.abs(diak4.i0.eta) <= 2.5
        has_track1 = np.abs(diak4.i1.eta) <= 2.5

        leadak4_id = diak4.i0.tightId & (has_track0 * (
            (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) &
            (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0)
        trailak4_id = has_track1 * (
            (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) &
            (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1

        df['mjj'] = diak4.mass.max()
        df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max())
        df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max()

        selection.add('two_jets', diak4.counts > 0)
        selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
        selection.add('trailak4_pt_eta', trailak4_pt_eta.any())
        selection.add('hemisphere', hemisphere)
        selection.add('leadak4_id', leadak4_id.any())
        selection.add('trailak4_id', trailak4_id.any())
        selection.add('mjj',
                      df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
        selection.add(
            'dphijj',
            df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
        selection.add(
            'detajj',
            df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)

        # Divide into three categories for trigger study
        if cfg.RUN.TRIGGER_STUDY:
            two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs(
                diak4.i1.eta) <= 2.4)
            two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs(
                diak4.i1.eta) > 2.4)
            one_jet_forward_one_jet_central = (~two_central_jets) & (
                ~two_forward_jets)
            selection.add('two_central_jets', two_central_jets.any())
            selection.add('two_forward_jets', two_forward_jets.any())
            selection.add('one_jet_forward_one_jet_central',
                          one_jet_forward_one_jet_central.any())

        # Dimuon CR
        leadmuon_index = muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge == 0).any())
        selection.add('two_muons', muons.counts == 2)

        # Single muon CR
        selection.add('one_muon', muons.counts == 1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index = electrons.pt.argmax()

        selection.add('one_electron', electrons.counts == 1)
        selection.add('two_electrons', electrons.counts == 2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())

        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge == 0).any())
        selection.add('two_electrons', electrons.counts == 2)

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index = photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId \
                         & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA)

        selection.add('one_photon', photons.counts == 1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig',
                      photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if df['has_lhe_v_pt']:
            output['genvpt_check'].fill(vpt=gen_v_pt,
                                        type="Nano",
                                        dataset=dataset)

        if 'LHE_Njets' in df:
            output['lhe_njets'].fill(dataset=dataset,
                                     multiplicity=df['LHE_Njets'])
        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])
        if 'LHE_HTIncoming' in df:
            output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons,
                                        electrons, photons)
            weights = pileup_weights(weights, df, evaluator, cfg)
            if not (gen_v_pt is None):
                weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt,
                                             df['mjj_gen'])

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask]]
                output['kinematics']['met_phi'] += [met_phi[mask]]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask]]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt]
                output['kinematics']['ak4eta0'] += [
                    ak4[leadak4_index][mask].eta
                ]
                output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [
                    muons[df['is_tight_muon']].counts[mask]
                ]
                output['kinematics']['mupt0'] += [
                    muons[leadmuon_index][mask].pt
                ]
                output['kinematics']['mueta0'] += [
                    muons[leadmuon_index][mask].eta
                ]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [
                    electrons[df['is_tight_electron']].counts[mask]
                ]
                output['kinematics']['elpt0'] += [
                    electrons[leadelectron_index][mask].pt
                ]
                output['kinematics']['eleta0'] += [
                    electrons[leadelectron_index][mask].eta
                ]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [
                    photons[df['is_tight_photon']].counts[mask]
                ]
                output['kinematics']['gpt0'] += [
                    photons[leadphoton_index][mask].pt
                ]
                output['kinematics']['geta0'] += [
                    photons[leadphoton_index][mask].eta
                ]

        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']
            output['sumw_pileup'][dataset] += weights._weights['pileup'].sum()

        regions = vbfhinv_regions(cfg)
        for region, cuts in regions.items():
            # Blinding
            if (self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region]['all'] += df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' + region][cutname] += selection.all(
                        *cuts[:icut + 1]).sum()

            mask = selection.all(*cuts)

            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])

            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=weights.weight()[mask])

            fill_mult('ak4_mult', ak4)
            fill_mult('bjet_mult', bjets)
            fill_mult('loose_ele_mult', electrons)
            fill_mult('tight_ele_mult', electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult', muons)
            fill_mult('tight_muo_mult', muons[df['is_tight_muon']])
            fill_mult('tau_mult', taus)
            fill_mult('photon_mult', photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(dataset=dataset, region=region, **kwargs)

            # Monitor weights
            for wname, wvalue in weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])
                ezfill("weights_wide",
                       weight_type=wname,
                       weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, weights.weight()[mask])
            w_alljets_nopref = weight_shape(
                ak4[mask].eta,
                weights.partial_weight(exclude=['prefire'])[mask])

            ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets)

            ezfill('ak4_eta_nopref',
                   jeteta=ak4[mask].eta.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_phi_nopref',
                   jetphi=ak4[mask].phi.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_pt_nopref',
                   jetpt=ak4[mask].pt.flatten(),
                   weight=w_alljets_nopref)

            # Leading ak4
            w_diak4 = weight_shape(diak4.pt[mask], weights.weight()[mask])
            ezfill('ak4_eta0',
                   jeteta=diak4.i0.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi0',
                   jetphi=diak4.i0.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt0',
                   jetpt=diak4.i0.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw0',
                   jetpt=diak4.i0.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf0',
                   frac=diak4.i0.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf0',
                   frac=diak4.i0.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst0',
                   nconst=diak4.i0.nconst[mask].flatten(),
                   weight=w_diak4)

            # Trailing ak4
            ezfill('ak4_eta1',
                   jeteta=diak4.i1.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi1',
                   jetphi=diak4.i1.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt1',
                   jetpt=diak4.i1.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw1',
                   jetpt=diak4.i1.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf1',
                   frac=diak4.i1.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf1',
                   frac=diak4.i1.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst1',
                   nconst=diak4.i1.nconst[mask].flatten(),
                   weight=w_diak4)

            # B tag discriminator
            btag = getattr(ak4, cfg.BTAG.ALGO)
            w_btag = weight_shape(btag[mask], weights.weight()[mask])
            ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag)

            # MET
            ezfill('dpfcalo',
                   dpfcalo=df["dPFCalo"][mask],
                   weight=weights.weight()[mask])
            ezfill('met', met=met_pt[mask], weight=weights.weight()[mask])
            ezfill('met_phi', phi=met_phi[mask], weight=weights.weight()[mask])
            ezfill('recoil',
                   recoil=df["recoil_pt"][mask],
                   weight=weights.weight()[mask])
            ezfill('recoil_phi',
                   phi=df["recoil_phi"][mask],
                   weight=weights.weight()[mask])
            ezfill('dphijm',
                   dphi=df["minDPhiJetMet"][mask],
                   weight=weights.weight()[mask])
            ezfill('dphijr',
                   dphi=df["minDPhiJetRecoil"][mask],
                   weight=weights.weight()[mask])

            ezfill('dphijj',
                   dphi=df["dphijj"][mask],
                   weight=weights.weight()[mask])
            ezfill('detajj',
                   deta=df["detajj"][mask],
                   weight=weights.weight()[mask])
            ezfill('mjj', mjj=df["mjj"][mask], weight=weights.weight()[mask])

            # Two dimensional
            ezfill('recoil_mjj',
                   recoil=df["recoil_pt"][mask],
                   mjj=df["mjj"][mask],
                   weight=weights.weight()[mask])

            # Muons
            if '_1m_' in region or '_2m_' in region:
                w_allmu = weight_shape(muons.pt[mask], weights.weight()[mask])
                ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu)
                ezfill('muon_mt',
                       mt=df['MT_mu'][mask],
                       weight=weights.weight()[mask])
                ezfill('muon_eta',
                       eta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_phi',
                       phi=muons.phi[mask].flatten(),
                       weight=w_allmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], weights.weight()[mask])
                ezfill('muon_pt0',
                       pt=dimuons.i0.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_pt1',
                       pt=dimuons.i1.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta0',
                       eta=dimuons.i0.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta1',
                       eta=dimuons.i1.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi0',
                       phi=dimuons.i0.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi1',
                       phi=dimuons.i1.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_pt',
                       pt=dimuons.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_eta',
                       eta=dimuons.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_mass',
                       dilepton_mass=dimuons.mass[mask].flatten(),
                       weight=w_dimu)

            # Electrons
            if '_1e_' in region or '_2e_' in region:
                w_allel = weight_shape(electrons.pt[mask],
                                       weights.weight()[mask])
                ezfill('electron_pt',
                       pt=electrons.pt[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_mt',
                       mt=df['MT_el'][mask],
                       weight=weights.weight()[mask])
                ezfill('electron_eta',
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_phi',
                       phi=electrons.phi[mask].flatten(),
                       weight=w_allel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask],
                                      weights.weight()[mask])
                ezfill('electron_pt0',
                       pt=dielectrons.i0.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_pt1',
                       pt=dielectrons.i1.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta0',
                       eta=dielectrons.i0.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta1',
                       eta=dielectrons.i1.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi0',
                       phi=dielectrons.i0.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi1',
                       phi=dielectrons.i1.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_pt',
                       pt=dielectrons.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_eta',
                       eta=dielectrons.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_mass',
                       dilepton_mass=dielectrons.mass[mask].flatten(),
                       weight=w_diel)

            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(
                    photons[leadphoton_index].pt[mask],
                    weights.weight()[mask])
                ezfill('photon_pt0',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_eta0',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_phi0',
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_pt0_recoil',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       recoil=df['recoil_pt'][mask
                                              & (leadphoton_index.counts > 0)],
                       weight=w_leading_photon)
                ezfill('photon_eta_phi',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], weights.weight()[mask])

            # PV
            ezfill('npv',
                   nvtx=df['PV_npvs'][mask],
                   weight=weights.weight()[mask])
            ezfill('npvgood',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=weights.weight()[mask])

            ezfill('npv_nopu',
                   nvtx=df['PV_npvs'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('npvgood_nopu',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])

            ezfill('rho_all',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=weights.weight()[mask])
            ezfill('rho_central',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=weights.weight()[mask])
            ezfill('rho_all_nopu',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('rho_central_nopu',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
        return output
Example #12
0
    def process(self, events):
        dataset = events.metadata['dataset']
        print('process dataset', dataset)
        isRealData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        weights = processor.Weights(len(events))
        output = self.accumulator.identity()
        if (len(events) == 0): return output
        if not isRealData:
            output['sumw'][dataset] += events.genWeight.sum()

        # trigger paths
        if isRealData:
            trigger_fatjet = np.zeros(events.size, dtype='bool')
            for t in self._triggers[self._year]:
                try:
                    trigger_fatjet = trigger_fatjet | events.HLT[t]
                except:
                    print('trigger %s not available' % t)
                    continue

            trigger_muon = np.zeros(events.size, dtype='bool')
            for t in self._muontriggers[self._year]:
                trigger_muon = trigger_muon | events.HLT[t]

        else:
            trigger_fatjet = np.ones(events.size, dtype='bool')
            trigger_muon = np.ones(events.size, dtype='bool')

        selection.add('fatjet_trigger', trigger_fatjet)
        selection.add('muon_trigger', trigger_muon)

        #jet corrected kinematics
        gru = events.GRU
        IN = events.IN
        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['rhocorr'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['gruddt'] = gru.v25 - shift(
            fatjets, algo='gruddt', year=self._year)
        fatjets['gru'] = gru.v25
        fatjets['in_v3'] = IN.v3
        fatjets['in_v3_ddt'] = IN.v3 - shift(
            fatjets, algo='inddt', year=self._year)
        fatjets['in_v3_ddt_90pctl'] = IN.v3 - shift(
            fatjets, algo='inddt90pctl', year=self._year)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)

        fatjets["genMatchFull"] = genmatch(events, dataset)
        #else: fatjets["genMatchFull"] = fatjets.pt.zeros_like()  #np.zeros(events.size, dtype='bool')

        candidatejet = fatjets[:, :1]
        candidatemuon = events.Muon[:, :5]

        # run model on PFCands associated to FatJet (FatJetPFCands)
        #events.FatJet.array.content["PFCands"] = type(events.FatJetPFCands.array).fromcounts(events.FatJet.nPFConstituents.flatten(), events.FatJetPFCands.flatten())
        #events.FatJet.array.content["twoProngGru"] = run_model(events.FatJet.flatten())

        selection.add('pt', (candidatejet.pt > 525).any())
        selection.add('msdcorr', (candidatejet.msdcorr > 40).any())
        # basic jet selection
        goodjet_sel = ((candidatejet.pt > 525)
                       & (abs(candidatejet.eta) < 2.5)
                       & (candidatejet.msoftdrop > 40.)
                       & (candidatejet.rhocorr > -5.5)
                       & (candidatejet.rhocorr < -2)
                       &
                       (candidatejet.genMatchFull if
                        ('WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset) else
                        (1 == 1))).any()

        vselection_goodjet_sel = ((candidatejet.pt > 200)
                                  & (abs(candidatejet.eta) < 2.5)
                                  & (candidatejet.msoftdrop > 40.)).any()
        #& (candidatejet.genMatchFull if ('TTTo' in dataset) else (1==1))).any()
        #& (candidatejet.rhocorr > -5.5)
        #& (candidatejet.rhocorr < -2)).any()

        selection.add('vselection_jetkin', vselection_goodjet_sel)

        #goodmuon sel for muon CR (lep vetos below)
        goodmuon_sel = ((candidatemuon.pt > 55)
                        & (abs(candidatemuon.eta) < 2.1)
                        & (candidatemuon.looseId).astype(bool)
                        & (candidatemuon.pfRelIso04_all < 0.15)).any()
        vselection_goodmuon_sel = ((candidatemuon.pt > 53)
                                   & (abs(candidatemuon.eta) < 2.1)
                                   & (candidatemuon.tightId).astype(bool))

        #& (candidatemuon.pfRelIso04_all < 0.15))

        vselection_goodmuon_sel_loose = ((candidatemuon.pt > 20)
                                         & (candidatemuon.looseId).astype(bool)
                                         & (abs(candidatemuon.eta) < 2.4))

        selection.add('vselection_muonkin', vselection_goodmuon_sel.any())
        selection.add('vselection_onetightmuon',
                      vselection_goodmuon_sel.sum() == 1)
        selection.add('vselection_oneloosemuon',
                      vselection_goodmuon_sel_loose.sum() == 1)

        candidatemuon = candidatemuon[:, 0:1]

        selection.add('muonkin', goodmuon_sel)
        selection.add('jetkin', goodjet_sel)

        selection.add('n2ddt', (candidatejet.n2ddt < 0.).any())
        selection.add('jetid', candidatejet.isTight.any())
        selection.add('met', events.MET.pt > 40.)

        muon_ak8_pair = candidatemuon.cross(candidatejet, nested=True)

        selection.add('muonDphiAK8',
                      (abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) >
                       2 * np.pi / 3).all().all())
        selection.add('vselection_muonDphiAK8', (abs(
            muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 1).all().all())

        #ak4 puppi jet for CR
        jets = events.Jet[((events.Jet.pt > 50.)
                           & (abs(events.Jet.eta) < 2.5))][:, :10]

        # only consider first 4 jets to be consistent with old framework
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        dr = abs(ak4_ak8_pair.i0.delta_r(ak4_ak8_pair.i1))
        dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1))

        ak4_away = jets[(dr > 0.8).all()]
        selection.add('ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838)
        ak4_opposite = jets[(dphi > np.pi / 2).all()]
        selection.add('antiak4btagMediumOppHem',
                      ak4_opposite.btagCSVV2.max() < 0.8838)

        mu_p4 = TLorentzVectorArray.from_ptetaphim(
            candidatemuon.pt.fillna(0), candidatemuon.eta.fillna(0),
            candidatemuon.phi.fillna(0), candidatemuon.mass.fillna(0))
        met_p4 = TLorentzVectorArray.from_ptetaphim(
            awkward.JaggedArray.fromiter([[v] for v in events.MET.pt]),
            awkward.JaggedArray.fromiter([[v] for v in np.zeros(events.size)]),
            awkward.JaggedArray.fromiter([[v] for v in events.MET.phi]),
            awkward.JaggedArray.fromiter([[v] for v in np.zeros(events.size)]))

        met_candidatemuon_pair = met_p4.cross(mu_p4)

        Wleptoniccandidate = met_candidatemuon_pair.i0 + met_candidatemuon_pair.i1

        selection.add('Wleptonic_candidate',
                      (Wleptoniccandidate.pt > 200).any())

        vselection_jets = events.Jet[((events.Jet.pt > 30.)
                                      & (abs(events.Jet.eta) < 2.4))]

        vselection_ak4_ak8_pair = vselection_jets.cross(candidatejet,
                                                        nested=True)
        muon_ak4_pair = vselection_jets.cross(candidatemuon, nested=True)
        dr_ak8 = abs(
            vselection_ak4_ak8_pair.i0.delta_r(vselection_ak4_ak8_pair.i1))
        dr_muon = abs(muon_ak4_pair.i0.delta_r(muon_ak4_pair.i1))
        ak4_away = vselection_jets[(dr_ak8 > 0.8).all()]
        selection.add('vselection_ak4btagMedium08',
                      ak4_away.btagCSVV2.max() > 0.8838)

        ak4_away = vselection_jets[(dr_muon > 0.3).all()]

        selection.add('vselection_muonDphiAK4',
                      ak4_away.btagCSVV2.max() > 0.8838)

        nelectrons = ((
            (events.Electron.pt > 10.)
            & (abs(events.Electron.eta) < 2.5)
            #& (events.Electron.cutBased >= events.Electron.LOOSE))
            #& (events.Electron.cutBased_Fall17_V1 >= 1))
            & (events.Electron.cutBased >= 2))).sum()
        nmuons = (((events.Muon.pt > 10)
                   & (abs(events.Muon.eta) < 2.1)
                   #& (events.Muon.pfRelIso04_all < 0.4)
                   & (events.Muon.looseId).astype(bool))).sum()

        ntaus = (((events.Tau.pt > 20.)
                  #& (events.Tau.idMVAnewDM2017v2 >=4))
                  & (events.Tau.idDecayMode).astype(bool)
                  & (events.Tau.rawIso < 5)
                  & (abs(events.Tau.eta) < 2.3))).sum()
        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('noelectron_notau', (nelectrons == 0) & (ntaus == 0))
        #weights.add('metfilter', events.Flag.METFilters)
        if isRealData:
            genflavor = candidatejet.pt.zeros_like().pad(
                1, clip=True).fillna(-1).flatten()
        if not isRealData:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            #add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) #signal region only
            #add_singleMuTriggerWeight(weights, abs(candidatemuon.eta), candidatemuon.pt, self._year)
            bosons = getBosons(events)
            genBosonPt = bosons.pt.pad(1, clip=True).fillna(0)
            add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            genflavor = matchedBosonFlavor(candidatejet, bosons).pad(
                1, clip=True).fillna(-1).flatten()

            #b-tag weights
        regions = {
            'signal': [
                'fatjet_trigger',
                'jetkin',
                'noleptons',
                'jetid',
                'antiak4btagMediumOppHem',
            ],
            'ttbar_muoncontrol': [
                'muon_trigger',
                'pt',
                'msdcorr',
                'jetid',
                'jetkin',
                'muonkin',
                'muonDphiAK8',
                'ak4btagMedium08',
                'noelectron_notau',
            ],
            'vselection': [
                'muon_trigger', 'vselection_jetkin', 'vselection_muonkin',
                'vselection_onetightmuon', 'vselection_oneloosemuon',
                'vselection_muonDphiAK8', 'vselection_ak4btagMedium08',
                'vselection_muonDphiAK4', 'Wleptonic_candidate', 'met'
            ],
            'noselection':
            [],  #'vselection_muoncontrol' : ['muon_trigger', 'v_selection_jetkin', 'genmatch', 'jetid', 'ak4btagMedium08', 'muonkin','met'],
        }
        allcuts_signal = set()
        output['cutflow_signal'][dataset]['none'] += float(
            weights.weight().sum())
        allcuts_ttbar_muoncontrol = set()
        output['cutflow_ttbar_muoncontrol'][dataset]['none'] += float(
            weights.weight().sum())
        allcuts_vselection = set()
        output['cutflow_vselection'][dataset]['none'] += float(
            weights.weight().sum())

        for cut in regions['signal']:
            allcuts_signal.add(cut)
            output['cutflow_signal'][dataset][cut] += float(
                weights.weight()[selection.all(*allcuts_signal)].sum())

        for cut in regions['ttbar_muoncontrol']:
            allcuts_ttbar_muoncontrol.add(cut)
            output['cutflow_ttbar_muoncontrol'][dataset][cut] += float(
                weights.weight()[selection.all(
                    *allcuts_ttbar_muoncontrol)].sum())

        for cut in regions['vselection']:
            allcuts_vselection.add(cut)
            output['cutflow_vselection'][dataset][cut] += float(
                weights.weight()[selection.all(*allcuts_vselection)].sum())

        def normalize(val, cut):
            return val[cut].pad(1, clip=True).fillna(0).flatten()

        def fill(region, systematic=None, wmod=None):
            print('filling %s' % region)
            selections = regions[region]
            cut = selection.all(*selections)
            weight = weights.weight()[cut]
            output['templates'].fill(
                dataset=dataset,
                region=region,
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                n2ddt=normalize(candidatejet.n2ddt, cut),
                gruddt=normalize(candidatejet.gruddt, cut),
                in_v3_ddt=normalize(candidatejet.in_v3_ddt_90pctl, cut),
                weight=weight,
            ),
            output['event'].fill(
                dataset=dataset,
                region=region,
                MET=events.MET.pt[cut],
                nJet=fatjets.counts[cut],
                nPFConstituents=normalize(candidatejet.nPFConstituents, cut),
                weight=weight,
            ),
            output['muon'].fill(
                dataset=dataset,
                region=region,
                mu_pt=normalize(candidatemuon.pt, cut),
                mu_pfRelIso04_all=normalize(candidatemuon.pfRelIso04_all, cut),
                weight=weight,
            ),
            output['deepAK8'].fill(
                dataset=dataset,
                region=region,
                deepTagMDWqq=normalize(candidatejet.deepTagMDWqq, cut),
                deepTagMDZqq=normalize(candidatejet.deepTagMDZqq, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                genflavor=genflavor[cut],
                weight=weight,
            ),
            output['in_v3'].fill(
                dataset=dataset,
                region=region,
                genflavor=genflavor[cut],
                in_v3=normalize(candidatejet.in_v3, cut),
                n2=normalize(candidatejet.n2b1, cut),
                gru=normalize(candidatejet.gru, cut),
                weight=weight,
            )

        for region in regions:
            fill(region)

        return output
Example #13
0
    def process(self, df):
        dataset = df.metadata['dataset']
        isRealData = 'genWeight' not in df.columns
        output = self.accumulator.identity()
        selection = processor.PackedSelection()
        output = self.accumulator.identity()

        good = False
        goodMuon = ((df.Muon.pt > 27.) & (np.abs(df.Muon.eta) < 2.4))
        nmuons = goodMuon.sum()

        goodElectron = ((df.Electron.pt > 30.)
                        & (np.abs(df.Electron.eta) < 2.5))
        nelectrons = goodElectron.sum()

        df.FatJet['msdcorr'] = corrected_msoftdrop(df.FatJet)

        goodFatJet = ((df.FatJet.pt > 300.)
                      & (np.abs(df.FatJet.eta) < 2.4)
                      & (df.FatJet.msdcorr > 10.)
                      & (df.FatJet.isTight))
        nfatjets = goodFatJet.sum()

        if self._channel == 'muon':
            good = ((nmuons >= 1) & (nfatjets >= 1))
        else:
            good = ((nelectrons >= 1) & (nfatjets >= 1))
        events = df[good]

        if not isRealData:
            output['sumw'][dataset] += events.genWeight.sum()

        # trigger
        trigger = np.zeros(df.size, dtype='bool')
        for t in self._triggers[self._year + '_' + self._trigger]:
            try:
                trigger = trigger | df.HLT[t]
            except:
                warnings.warn("Missing trigger %s" % t, RuntimeWarning)
        selection.add('trigger', trigger[good])

        # Muons
        candidatemuon = events.Muon[:, 0:1]
        nmuons = events.Muon.counts

        # Electrons
        candidateelectron = events.Electron[:, 0:1]
        nelectrons = events.Electron.counts

        if self._channel == 'muon':
            candidatelep = candidatemuon
            selection.add('nootherlepton', (nelectrons == 0))
        else:
            candidatelep = candidateelectron
            selection.add('nootherlepton', (nmuons == 0))

        selection.add('iplepton', ((np.abs(candidatelep.dz) < 0.1)
                                   & (np.abs(candidatelep.dxy) < 0.05)).any())

        # FatJets
        ak8_lep_pair = candidatelep.cross(events.FatJet)
        ak8_lep_dR = ak8_lep_pair.i0.delta_r(ak8_lep_pair.i1)

        candidatejet = events.FatJet[ak8_lep_dR.argmin()]
        leadingjet = events.FatJet[:, 0:1]

        ak8_lep_dR_closest = candidatelep.delta_r(candidatejet)

        selection.add('jetkin', (candidatejet.pt > self._fjetptMIN).any())
        selection.add('jetmsd', (candidatejet.msdcorr > 20).any())
        selection.add('LSF3medium', (candidatejet.lsf3 > 0.7).any())
        selection.add('LSF3tight', (candidatejet.lsf3 > 0.78).any())
        selection.add('lepnearjet', (ak8_lep_dR.min() < 1.5))
        selection.add('lepinjet', (ak8_lep_dR.min() < 0.8))

        # FatJet substracted Lepton
        # sj1_sj2_btagDeepB_pair = candidatejet.LSsubJet1btagDeepB.cross(candidatejet.LSsubJet2btagDeepB)
        # fls_btagDeepB_max = max(sj1_sj2_btagDeepB_pair.i0,sj1_sj2_btagDeepB_pair.i1)

        # Jets
        jets = events.Jet[(events.Jet.pt > 30.)
                          & (abs(events.Jet.eta) < 2.5)
                          & (events.Jet.isTight)]
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        ak4_ak8_dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1))
        ak4_opposite = jets[(ak4_ak8_dphi > np.pi / 2).all()]
        ak4_away = jets[(ak4_ak8_dphi > 0.8).all()]

        selection.add(
            'antiak4btagMediumOppHem',
            ak4_opposite.btagDeepB.max() < self._btagWPs['med'][self._year])
        selection.add(
            'ak4btagMedium08',
            ak4_away.btagDeepB.max() < self._btagWPs['med'][self._year])

        # MET
        met = events.MET

        # MET eta with mass assumption
        mm = (candidatejet - candidatelep).mass2
        jmass = (mm > 0) * np.sqrt(np.maximum(
            0, mm)) + (mm < 0) * candidatejet.mass

        joffshell = jmass < 62.5
        massassumption = 80. * joffshell + (125 - 80.) * ~joffshell
        x = massassumption**2 / (2 * candidatelep.pt *
                                 met.pt) + np.cos(candidatelep.phi - met.phi)
        met_eta = ((x < 1) * np.arcsinh(x * np.sinh(candidatelep.eta)) +
                   (x > 1) *
                   (candidatelep.eta -
                    np.sign(candidatelep.eta) * np.arccosh(candidatelep.eta)))

        met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),
                                                    np.array([0.]),
                                                    np.array([0.]),
                                                    np.array([0.]))
        if met.size > 0:
            met_p4 = TLorentzVectorArray.from_ptetaphim(
                met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size))
            hmass = (candidatejet + met_p4).mass
        else:
            hmass = candidatejet.pt.zeros_like()

        # weights
        weights = processor.Weights(len(events), storeIndividual=True)
        if isRealData:
            genflavor = candidatejet.pt.zeros_like()
        else:
            try:
                weights.add('genweight', events.genWeight)
                add_pileup_weight(weights, events.Pileup.nPU, self._year)
                #print("Weight statistics: %r" % weights._weightStats)
            except:
                print('no gen weight')
            if 'TTTo' in dataset:
                genW, genW_idx = getParticles(
                    events, 24, ['fromHardProcess', 'isLastCopy'])
                genb, genb_idx = getParticles(
                    events, 5, ['fromHardProcess', 'isLastCopy'])
                genflavorW = matchedParticleFlavor(candidatelep, genW, 'child',
                                                   0.4)
                genflavorb = matchedParticleFlavor(candidatelep, genb, 'mom',
                                                   0.4)
                genflavor = getFlavor(genflavorW, genflavorb)
            elif (('hww_2017' in dataset) or ('GluGluHToWW' in dataset)):
                genH, genH_idx = getParticles(
                    events, 25, ['fromHardProcess', 'isLastCopy'])
                genW, genW_idx = getParticles(
                    events, 24, ['fromHardProcess', 'isLastCopy'])
                genE, genE_idx = getParticles(
                    events, 11, ['fromHardProcess', 'isFirstCopy'], 1)
                genM, genM_idx = getParticles(
                    events, 13, ['fromHardProcess', 'isFirstCopy'], 1)
                genT, genT_idx = getParticles(
                    events, 15, ['fromHardProcess', 'isFirstCopy'], 1)
                genQ, genQ_idx = getParticles(
                    events, [0, 5], ['fromHardProcess', 'isFirstCopy'])
                ishWW_qqelev = (genH.counts == 1) & (genW.counts == 2) & (
                    genE.counts == 1) & (genM.counts == 0) & (genT.counts == 0)
                ishWW_qqmuv = (genH.counts == 1) & (genW.counts == 2) & (
                    genM.counts == 1) & (genE.counts == 0) & (genT.counts == 0)
                ishWW_qqtauv = (genH.counts == 1) & (genW.counts == 2) & (
                    genT.counts == 1) & (genM.counts == 0) & (genE.counts == 0)
                ishWW_qqqq = (genH.counts == 1) & (genW.counts == 2) & (
                    genQ.counts == 4) & (genM.counts == 0) & (genE.counts == 0)
                ishWW_muvelev = (genH.counts == 1) & (genW.counts == 2) & (
                    genE.counts == 1) & (genM.counts == 1)
                ishWW_elevelev = (genH.counts == 1) & (genW.counts == 2) & (
                    genE.counts == 2) & (genM.counts == 0)
                ishWW_tauvtauv = (genH.counts == 1) & (genW.counts == 2) & (
                    genT.counts == 2) & (genM.counts == 0) & (genE.counts == 0)
                ishWW_muvmuv = (genH.counts == 1) & (genW.counts == 2) & (
                    genE.counts == 0) & (genM.counts == 2)
                genflavor = ((ishWW_qqelev) * 8 + (ishWW_qqmuv) * 9)
            else:
                genflavor = candidatejet.pt.zeros_like()

        # fill cutflow
        cutflow = [
            'trigger', 'jetkin', 'jetmsd', 'lepnearjet', 'lepinjet',
            'antiak4btagMediumOppHem', 'nootherlepton', 'iplepton',
            'LSF3medium', 'LSF3tight'
        ]
        allcuts = set()
        output['cutflow']['none'] += len(events)
        for cut in cutflow:
            allcuts.add(cut)
            output['cutflow'][cut] += selection.all(*allcuts).sum()

        regions = {}
        regions['presel'] = {'trigger', 'jetkin', 'jetmsd', 'lepinjet'}
        regions['antibtag'] = {
            'trigger', 'jetkin', 'jetmsd', 'antiak4btagMediumOppHem'
        }
        regions['noinjet'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepnearjet',
            'antiak4btagMediumOppHem'
        }
        regions['nolsf'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepinjet',
            'antiak4btagMediumOppHem'
        }  #,'nootherlepton'}
        regions['lsf'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight'
        }
        regions['bopp'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight',
            'antiak4btagMediumOppHem'
        }
        regions['lep'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight',
            'antiak4btagMediumOppHem', 'nootherlepton', 'iplepton'
        }

        for region in self._regions:
            selections = regions[region]
            cut = selection.all(*selections)
            weight = weights.weight()[cut]

            def normalize(val):
                try:
                    return val[cut].pad(1, clip=True).fillna(0).flatten()
                except:
                    try:
                        return val[cut].flatten()
                    except:
                        return val[cut]

            # output['%s_fjetprop'%region].fill(#fjet_pt = normalize(candidatejet.pt),
            #                                   fjet_msd = normalize(candidatejet.msdcorr),
            #                                   fjet_lsf3 = normalize(candidatejet.lsf3),
            #                                   #jet_oppbtag = normalize(ak4_opposite.btagDeepB.max()),
            #                                   genflavor = normalize(genflavor),
            #                                   dataset=dataset,
            #                                   weight=weight
            # )
            # output['%s_fjetextraprop'%region].fill(fjet_t41 = normalize(candidatejet.tau4/candidatejet.tau1),
            #                                        fjet_t42 = normalize(candidatejet.tau4/candidatejet.tau2),
            #                                        fjet_t31 = normalize(candidatejet.tau3/candidatejet.tau1),
            #                                        dataset=dataset,
            #                                        weight=weight
            #                                    )
            # output['%s_jetprop'%region].fill(jet_oppbtag = normalize(ak4_opposite.btagDeepB.max()),
            #                                  genflavor = normalize(genflavor),
            #                                  dataset=dataset,
            #                                  weight=weight
            #                                 )
            output['%s_fmmjetprop' % region].fill(
                fjet_pt=normalize(candidatejet.pt),
                #fjet_mmass = normalize(jmass),
                #fjet_hmass = normalize(hmass),
                lep_pt=normalize(candidatelep.pt),
                fjet_lsf3=normalize(candidatejet.lsf3),
                genflavor=normalize(genflavor),
                dataset=dataset,
                weight=weight)
            output['%s_fmmjetprop2' % region].fill(
                fjet_mmass=normalize(jmass),
                fjet_lsf3=normalize(candidatejet.lsf3),
                genflavor=normalize(genflavor),
                dataset=dataset,
                weight=weight)
            # output['%s_flsjetprop'%region].fill(#flsjet_pt = normalize(candidatejet.LSpt),
            #                                     flsjet_msd = normalize(candidatejet.LSmsoftdrop),
            #                                     #flsjet_n2b1 = normalize(candidatejet.LSn2b1),
            #                                     #flsjet_n3b1 = normalize(candidatejet.LSn3b1),
            #                                     #flsjet_t21 = normalize(candidatejet.LStau2/candidatejet.LStau1),
            #                                     #flsjet_t32 = normalize(candidatejet.LStau3/candidatejet.LStau2),
            #                                     genflavor = normalize(genflavor),
            #                                     dataset=dataset,
            #                                     weight=weight)
            #output['%s_metprop'%region].fill(met_pt = normalize(met.pt),
            #                                 met_phi = normalize(met.phi),
            #                                 dataset=dataset,
            #                                 weight=weight)
            # output['%s_weight'%region].fill(puweight=weights.partial_weight(include=["pileup_weight"])[cut],
            #                                 genweight=weights.partial_weight(include=["genweight"])[cut],
            #                                 dataset=dataset,
            #                                 )
            # if self._channel=='muon':
            #     output['%s_muonprop'%region].fill(muon_pt = normalize(candidatemuon.pt),
            #                                       muon_miso = normalize(candidatemuon.miniPFRelIso_all),
            #                                       muon_sip = normalize(candidatemuon.sip3d),
            #                                       dataset=dataset,
            #                                       weight=weight)
            #     output['%s_muonextraprop'%region].fill(nmuons = normalize(nmuons),
            #                                            nelectrons = normalize(nelectrons),
            #                                            muon_dz = normalize(candidatemuon.dz),
            #                                            muon_dxy = normalize(candidatemuon.dxy),
            #                                            dataset=dataset,
            #                                            weight=weight)

            # else:
            #     output['%s_electronprop'%region].fill(electron_pt = normalize(candidateelectron.pt),
            #                                           electron_miso = normalize(candidateelectron.miniPFRelIso_all),
            #                                           electron_sip = normalize(candidateelectron.sip3d),
            #                                           dataset=dataset,
            #                                           weight=weight)
            #     output['%s_electronextraprop'%region].fill(nmuons = normalize(nmuons),
            #                                                nelectrons = normalize(nelectrons),
            #                                                electron_dz = normalize(candidateelectron.dz),
            #                                                electron_dxy = normalize(candidateelectron.dxy),
            #                                                dataset=dataset,
            #                                                weight=weight)

        return output
Example #14
0
    def process(self, events):

        dataset = events.metadata['dataset']
        isData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        hout = self.accumulator.identity()
        match = self._common['match']

        isLooseElectron = self._ids['isLooseElectron']
        isLooseMuon = self._ids['isLooseMuon']
        isLoosePhoton = self._ids['isLoosePhoton']
        isTightPhoton = self._ids['isTightPhoton']
        isGoodJet = self._ids['isGoodJet']

        #### Select loose muon and electron to select clean photon
        mu = events.Muon
        mu['isloose'] = isLooseMuon(mu.pt, mu.eta, mu.pfRelIso04_all,
                                    mu.looseId, self._year)
        mu_loose = mu[mu.isloose.astype(np.bool)]

        e = events.Electron
        e['isclean'] = ~match(e, mu_loose, 0.3)
        e['isloose'] = isLooseElectron(e.pt, e.eta + e.deltaEtaSC, e.dxy, e.dz,
                                       e.cutBased, self._year)
        e_clean = e[e.isclean.astype(np.bool)]
        e_loose = e_clean[e_clean.isloose.astype(np.bool)]

        #### Consider clean and tight photon for purity measurement
        pho = events.Photon
        pho['isclean'] = ~match(pho, mu_loose, 0.5) & ~match(pho, e_loose, 0.5)

        _id = 'cutBasedBitmap'
        if self._year == '2016':
            _id = 'cutBased'

        def isPurityPhoton(pt, medium_id):
            mask = ~(pt == np.nan)
            if self._year == '2016':
                mask = (pt > 200) & (medium_id >= 2)
            else:
                mask = (pt > 200) & ((medium_id & 2) == 2)
            return mask

        pho['isloose'] = isLoosePhoton(pho.pt, pho.eta, pho[_id],
                                       self._year) & (pho.electronVeto)
        pho['ispurity'] = isPurityPhoton(
            pho.pt, pho[_id]) & (pho.isScEtaEB) & (pho.electronVeto)
        pho_clean = pho[pho.isclean.astype(np.bool)]
        pho_loose = pho_clean[pho_clean.isloose.astype(np.bool)]
        pho_purity = pho_clean[pho_clean.ispurity.astype(np.bool)]
        pho_nosieie = pho_clean[(pho_clean.pt > 200) & (pho_clean.isScEtaEB) &
                                (pho_clean.electronVeto)
                                & medium_id_no_sieie(pho_clean)]
        pho_nosieie_inv_iso = pho_clean[(pho_clean.pt > 200)
                                        & (pho_clean.isScEtaEB) &
                                        (pho_clean.electronVeto) &
                                        medium_id_no_sieie_inv_iso(pho_clean)]

        #### Consider AK4 jet
        def isPurityJet(pt, eta, jet_id):
            mask = (pt > 30) & (abs(eta) < 2.4) & ((jet_id & 2) == 2)
            return mask

        j = events.Jet
        #30 GeV cut on jet pT, we need to check later
        #j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId, j.neHEF, j.neEmEF, j.chHEF, j.chEmEF)
        j['ispurity'] = isPurityJet(j.pt, j.eta, j.jetId)
        j['isclean'] = ~match(j, e_loose, 0.4) & ~match(
            j, mu_loose, 0.4) & ~match(j, pho_loose, 0.4)
        j_purity = j[j.ispurity.astype(np.bool)]
        j_clean = j_purity[j_purity.isclean.astype(np.bool)]
        j_nclean = j_clean.counts

        met = events.MET

        #### Genweights
        weights = processor.Weights(len(events), storeIndividual=True)

        if isData:
            weights.add('genw', np.ones(events.size))
        else:
            weights.add('genw', events.genWeight)

        #### MET filter & single photon trigger
        met_filters = np.ones(events.size, dtype=np.bool)
        if isData: met_filters = met_filters & events.Flag['eeBadScFilter']
        for flag in PhotonPurity.met_filter_flags[self._year]:
            met_filters = met_filters & events.Flag[flag]
        #selection.add('met_filters',met_filters)

        triggers = np.zeros(events.size, dtype=np.bool)
        for path in self._singlephoton_triggers[self._year]:
            if path not in events.HLT.columns: continue
            triggers = triggers | events.HLT[path]
        #selection.add('singlephoton_triggers', triggers)

        #selection.add('jet_cut', (j_nclean>0))
        #selection.add('met60', (met.pt<60))

        event_mask = met_filters & triggers & (met.pt < 60) & (j_nclean > 0)

        hout['count'].fill(dataset=dataset,
                           cat='medium',
                           sieie=pho_purity.sieie[event_mask].flatten(),
                           pt=pho_purity.pt[event_mask].flatten(),
                           weight=weight_shape(pho_purity.sieie[event_mask],
                                               weights.weight()[event_mask]))

        hout['count'].fill(dataset=dataset,
                           cat='medium_nosieie',
                           sieie=pho_nosieie.sieie[event_mask].flatten(),
                           pt=pho_nosieie.pt[event_mask].flatten(),
                           weight=weight_shape(pho_nosieie.sieie[event_mask],
                                               weights.weight()[event_mask]))

        hout['count'].fill(
            dataset=dataset,
            cat='medium_nosieie_invertiso',
            sieie=pho_nosieie_inv_iso.sieie[event_mask].flatten(),
            pt=pho_nosieie_inv_iso.pt[event_mask].flatten(),
            weight=weight_shape(pho_nosieie_inv_iso.sieie[event_mask],
                                weights.weight()[event_mask]))

        if isData:
            hout['sumw'].fill(dataset=dataset, sumw=1, weight=1)
        else:
            hout['sumw'].fill(dataset=dataset,
                              sumw=1,
                              weight=events.genWeight.sum())

        return hout
Example #15
0
    def process(self, df):
        output = self.accumulator.identity()
        dataset = df['dataset']

        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'].content,
            py=df['pfjet_p4.fCoordinates.fY'].content,
            pz=df['pfjet_p4.fCoordinates.fZ'].content,
            energy=df['pfjet_p4.fCoordinates.fT'].content,
            sumtkpt=df['pfjet_tkPtSum05'].content,
            pfiso=df['pfjet_pfIsolationNoPU05'].content,
            isodbeta=df['pfjet_pfiso'].content,
            mintkdist=df['pfjet_pfcands_minTwoTkDist'].content,
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype == 3).sum()
        ndsa = (ljdautype == 8).sum()
        isegammajet = (npfmu == 0) & (ndsa == 0)
        ispfmujet = (npfmu >= 2) & (ndsa == 0)
        isdsajet = ndsa > 0
        label = isegammajet.astype(int) * 1 + ispfmujet.astype(
            int) * 2 + isdsajet.astype(int) * 3
        leptonjets.add_attributes(label=label)
        nmu = ((ljdautype == 3) | (ljdautype == 8)).sum()
        leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0))
        ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum()
        leptonjets.add_attributes(qsum=ljdaucharge)
        leptonjets.add_attributes(
            isneutral=(leptonjets.iseltype
                       | (leptonjets.ismutype & (leptonjets.qsum == 0))))
        leptonjets.add_attributes(
            mucharged=(leptonjets.iseltype
                       | (leptonjets.ismutype & (leptonjets.qsum != 0))))
        ljdsamuSubset = fromNestNestIndexArray(
            df['dsamuon_isSubsetFilteredCosmic1Leg'],
            awkward.fromiter(df['pfjet_pfcand_dsamuonIdx']))
        leptonjets.add_attributes(nocosmic=(ljdsamuSubset.sum() == 0))
        leptonjets = leptonjets[(leptonjets.nocosmic) & (leptonjets.pt > 30) &
                                (leptonjets.mintkdist < 50)]

        ## __ twoleptonjets__
        twoleptonjets = leptonjets.counts >= 2
        dileptonjets = leptonjets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size == 0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum() == 1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents
                         & muljInLeading2Events).astype(int) * 1

        doubleMuljEvents = dileptonjets.ismutype.sum() == 2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        # isControl = (np.abs(lj0.p4.delta_phi(lj1.p4))<np.pi/2).flatten()
        # if self.data_type!='data':
        #     dileptonjets = dileptonjets[isControl]
        #     channel_ = channel_[isControl]
        #     wgt = wgt[isControl]

        mask_ = (lj0.isneutral & lj1.isneutral).flatten()
        if self.bothNeutral is False:
            mask_ = (lj0.mucharged & lj1.mucharged).flatten()
            # mask_ = ((channel_==2)&((~lj0.isneutral&(~lj1.isneutral)).flatten())) | ((channel_==1)&mask_)

        channel_ = channel_[mask_]
        wgt = wgt[mask_]
        dileptonjets = dileptonjets[mask_]

        minpfiso = (lj0.pfiso > lj1.pfiso).astype(int) * lj1.pfiso + (
            lj0.pfiso < lj1.pfiso).astype(int) * lj0.pfiso
        output['minpfiso'].fill(dataset=dataset,
                                iso=minpfiso[mask_].flatten(),
                                channel=channel_,
                                weight=wgt)
        maxpfiso = (lj0.pfiso > lj1.pfiso).astype(int) * lj0.pfiso + (
            lj0.pfiso < lj1.pfiso).astype(int) * lj1.pfiso
        output['maxpfiso'].fill(dataset=dataset,
                                iso=maxpfiso[mask_].flatten(),
                                channel=channel_,
                                weight=wgt)

        output['lj0pfiso'].fill(dataset=dataset,
                                iso=lj0.pfiso[mask_].flatten(),
                                channel=channel_,
                                weight=wgt)

        ljones = dileptonjets.pt.ones_like()
        output['sumpt'].fill(dataset=dataset,
                             sumpt=dileptonjets.sumtkpt.flatten(),
                             channel=(channel_ * ljones).flatten(),
                             weight=(wgt * ljones).flatten())
        output['pfiso'].fill(dataset=dataset,
                             iso=dileptonjets.pfiso.flatten(),
                             channel=(channel_ * ljones).flatten(),
                             weight=(wgt * ljones).flatten())
        output['isodbeta'].fill(dataset=dataset,
                                iso=dileptonjets.isodbeta.flatten(),
                                channel=(channel_ * ljones).flatten(),
                                weight=(wgt * ljones).flatten())

        return output
    def process(self, df):
        dataset = df['dataset']
        isRealData = 'genWeight' not in df
        isSignal = 'htautau' in dataset
        output = self.accumulator.identity()

        # select at least one jet and one muon ( this is Pre-Selection! )                                                                                                       
        events = buildevents(df, fatjet='CustomAK8Puppi')
        good = (
            (events.muons.counts >= 1)
            & (events.fatjets.counts >= 1)
            )
        events = events[good]

        selection = processor.PackedSelection()
        # trigger
        trigger = np.ones(df.size, dtype='bool')
        for t in self._triggers[self._year+'_'+self._trigger]:
            trigger &= df[t]
        selection.add('trigger', trigger[good])

        # muon selection
        goodmuon = (
            (events.muons.p4.pt > 10)
            & (np.abs(events.muons.p4.eta) < 2.4)
            & (events.muons.sip3d < 4)
            & (np.abs(events.muons.dz) < 0.1)
            & (np.abs(events.muons.dxy) < 0.05)
            & (events.muons.mvaId == 2)
        )
        nmuons = goodmuon.sum()
        leadingmuon = events.muons[goodmuon][:, 0:1]

        # fatjet closest to lepton 
        leadingmuon = events.muons[:, 0]
        mujet_dR = leadingmuon.p4.delta_r(events.fatjets.p4)
        mu_in_cone = mujet_dR.min() < 0.8 # this I am not sure we have to put as a selection...
        mujet_bestidx = mujet_dR.argmin()
        leadingjet_mu = events.fatjets[mujet_bestidx]

        selection.add('jetkin', (
                (leadingjet_mu.p4.pt > 300)
                & (leadingjet_mu.p4.eta < 2.4)
                & (leadingjet_mu.msoftdrop > 10.)
                ).any())
        selection.add('jetid', (leadingjet_mu.jetId & 2).any())  # tight id 

        # lepton inside jet?
        selection.add('muinside', mu_in_cone.astype(bool))
        selection.add('LSF3muinside', (leadingjet_mu.electronIdx3SJ == 0).any())
        selection.add('LSF3medium', (leadingjet_mu.lsf3>0.78).any())

        # veto b-tag in opposite side
        jets = events.jets[
            (events.jets.p4.pt > 30.)
            & (events.jets.jetId & 2)  # tight id
            ]
        ak4_ak8_pair = jets.cross(leadingjet_mu, nested=True)
        dphi = ak4_ak8_pair.i0.p4.delta_phi(ak4_ak8_pair.i1.p4)
        ak4_opposite = jets[(np.abs(dphi) > np.pi / 2).all()]
        selection.add('antiak4btagMediumOppHem', ak4_opposite.deepcsvb.max() < self._btagWPs['med'][self._year])

        # b-tag in same side
        #subjets = events.subjets[:, leadingjet_mu.subJetIdx1]

        # final lepton selection
        nelectrons = (
            (events.electrons.p4.pt > 10)
            & (np.abs(events.electrons.p4.eta) < 2.5)
            & (events.electrons.cutBased & (1 << 2)).astype(bool)  # 2017V2 loose                                                                                                    
        ).sum()
        selection.add('onemuon', (nmuons == 1) & (nelectrons == 0)) # should we veto taus?                                                                                                          
        selection.add('muonkin', (
            (leadingmuon.p4.pt > 27.)
            & (np.abs(leadingmuon.p4.eta) < 2.4)
            ))

        # building variables
        leadingjet_mu = leadingjet_mu.flatten()
        mm = (leadingjet_mu.p4 - leadingmuon.p4).mass2 
        jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*leadingjet_mu.p4.mass # (jet - lep).M  

        met = events.met
        joffshell = jmass < 62.5
        massassumption = 80.*joffshell + (125 - 80.)*~joffshell
        x = massassumption**2/(2*leadingmuon.p4.pt*met.rho) + np.cos(leadingmuon.p4.phi - met.phi)
        met_eta = (
            (x < 1)*np.arcsinh(x*np.sinh(leadingmuon.p4.eta))
            + (x >= 1)*(
                leadingmuon.p4.eta
                - np.sign(leadingmuon.p4.eta)*np.arccosh(np.maximum(1., x))
                )
            )
        met_p4 = TLorentzVectorArray.from_ptetaphim(met.rho, met_eta, met.phi, np.zeros(met.size))

        # filling missing columns
        df['jet_pt'] = leadingjet_mu.p4.pt
        df['jet_lsf3'] = leadingjet_mu.lsf3
        df['jet_mmass'] = jmass
        df['jet_hmass'] = (met_p4 + leadingjet_mu.p4).mass
        df['jet_oppbtag'] = ak4_opposite.deepcsvb.max()
        df['muon_pt'] = leadingmuon.p4.pt
        df['muon_miso'] = leadingmuon.miniPFRelIso_all
        df['met_pt'] = met.rho
        df['met_eta'] = met_eta

        # fill cutflow
        cutflow = ['trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin', 'muinside', 'LSF3muinside','LSF3muinside']
        allcuts = set()
        output['cutflow']['none'] += len(events)
        for cut in cutflow:
            allcuts.add(cut)
            output['cutflow'][cut] += selection.all(*allcuts).sum()

        weights = processor.Weights(len(events))
        if not isRealData:
            weights.add('genweight', events.genWeight)

        regions = {}
        regions['presel'] = {'trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin'}
        regions['muinjet'] = {'trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin', 'muinside', 'LSF3muinside','LSF3muinside'}

        for histname, h in output.items():
            if not isinstance(h, hist.Hist):
                continue
            if not all(k in df or k == 'systematic' for k in h.fields):
                print("Missing fields %r from %r" % (set(h.fields) - set(df.keys()), h))
                continue
            fields = {k: df[k] for k in h.fields if k in df}
            region = [r for r in regions.keys() if r in histname.split('_')]
            if len(region) == 1:
                region = region[0]
                cut = selection.all(*regions[region])
                h.fill(**fields, weight=cut)
            elif len(region) > 1:
                raise ValueError("Histogram '%s' has a name matching multiple region definitions: %r" % (histname, region))
            else:
                raise ValueError("Histogram '%s' does not fall into any region definitions." % (histname, ))

        return output
Example #17
0
    def process(self, df):

            dataset = df['dataset']

            selected_regions = {}
            if not dataset in selected_regions: selected_regions[dataset] = []
            for selection,v in self._samples.items():
                for i in range (0,len(v)):
                    if v[i] not in dataset: continue
                    selected_regions[dataset].append(selection)

            ###
            #Getting corrections, ids, triggers, ecc, from .coffea files
            ###

            met_trigger_paths       = self._triggers['met_trigger_paths']      
            singleele_trigger_paths = self._triggers['singleele_trigger_paths']
            singlepho_trigger_paths = self._triggers['singlepho_trigger_paths']

            get_msd_weight          = self._corrections['get_msd_weight']    
            get_ttbar_weight        = self._corrections['get_ttbar_weight']       
            get_nlo_weight          = self._corrections['get_nlo_weight']         
            get_adhoc_weight        = self._corrections['get_adhoc_weight']       
            get_pu_weight           = self._corrections['get_pu_weight']          
            get_met_trig_weight     = self._corrections['get_met_trig_weight']    
            get_met_zmm_trig_weight = self._corrections['get_met_zmm_trig_weight']
            get_ele_trig_weight     = self._corrections['get_ele_trig_weight']    
            get_pho_trig_weight     = self._corrections['get_pho_trig_weight']    
            get_ecal_bad_calib      = self._corrections['get_ecal_bad_calib']     

            isLooseElectron = self._ids['isLooseElectron'] 
            isTightElectron = self._ids['isTightElectron'] 
            isLooseMuon     = self._ids['isLooseMuon']     
            isTightMuon     = self._ids['isTightMuon']     
            isLooseTau      = self._ids['isLooseTau']      
            isLoosePhoton   = self._ids['isLoosePhoton']   
            isTightPhoton   = self._ids['isTightPhoton']   
            isGoodJet       = self._ids['isGoodJet']       
            isGoodFatJet    = self._ids['isGoodFatJet']    
            isHEMJet        = self._ids['isHEMJet']        

            met_filter_flags = self._metfilters['met_filter_flags']

            ###
            #Initialize global quantities (MET ecc.)
            ###

            met = Initialize({'pt':df['MET_pt'],
                              'eta':0,
                              'phi':df['MET_phi'],
                              'mass':0})

            calomet = Initialize({'pt':df['CaloMET_pt'],
                                  'eta':0,
                                  'phi':df['CaloMET_phi'],
                                  'mass':0})

            ###
            #Initialize physics objects
            ###

            #Define first and empty object that will use as protection against arrays with size 0
            #Will use MET to set the correct size for the arrays
            #Not used at the moment

            #empty_jagged = awkward.JaggedArray.fromcounts(np.ones_like(met.pt, dtype=int),np.zeros_like(met.pt))
            #empty_obj = Initialize({'pt':empty_jagged,
            #                        'eta':empty_jagged,
            #                        'phi':empty_jagged,
            #                        'mass':empty_jagged})

            e = Initialize({'pt':df['Electron_pt'],
                            'eta':df['Electron_eta'],
                            'phi':df['Electron_phi'],
                            'mass':df['Electron_mass']})

            for key in self._e_id[self._year]:
                if self._e_id[self._year][key] in df:
                    e[key] = df[self._e_id[self._year][key]]
                else:
                    e[key] = e.pt.zeros_like()

            e['isloose'] = isLooseElectron(e.pt,e.eta,e.dxy,e.dz,e.iso,e.loose_id,self._year)
            e['istight'] = isTightElectron(e.pt,e.eta,e.dxy,e.dz,e.iso,e.tight_id,self._year)

            leading_e = e[e.pt.argmax()]
            leading_e = leading_e[leading_e.istight.astype(np.bool)]

            e_loose = e[e.isloose.astype(np.bool)]
            e_tight = e[e.istight.astype(np.bool)]

            e_ntot = e.counts
            e_nloose = e_loose.counts
            e_ntight = e_tight.counts

            mu = Initialize({'pt':df['Muon_pt'],
                             'eta':df['Muon_eta'],
                             'phi':df['Muon_phi'],
                             'mass':df['Muon_mass']})

            for key in self._mu_id[self._year]:
                if self._mu_id[self._year][key] in df:
                    mu[key] = df[self._mu_id[self._year][key]]
                else:
                    mu[key] = mu.pt.zeros_like()

            mu['isloose'] = isLooseMuon(mu.pt,mu.eta,mu.dxy,mu.dz,mu.iso,mu.med_id,self._year)
            mu['istight'] = isTightMuon(mu.pt,mu.eta,mu.dxy,mu.dz,mu.iso,mu.tight_id,self._year)

            leading_mu = mu[mu.pt.argmax()]
            leading_mu = leading_mu[leading_mu.istight.astype(np.bool)]

            mu_loose=mu[mu.isloose.astype(np.bool)]
            mu_tight=mu[mu.istight.astype(np.bool)]

            mu_ntot = mu.counts
            mu_nloose = mu_loose.counts
            mu_ntight = mu_tight.counts

            tau = Initialize({'pt':df['Tau_pt'],
                              'eta':df['Tau_eta'],
                              'phi':df['Tau_phi'],
                              'mass':df['Tau_mass']})

            for key in self._tau_id[self._year]:
                if self._tau_id[self._year][key] in df:
                    tau[key] = df[self._tau_id[self._year][key]]
                else:
                    tau[key] = tau.pt.zeros_like()


            tau['isclean'] =~tau.match(mu_loose,0.3)&~tau.match(e_loose,0.3)
            tau['isloose']=isLooseTau(tau.pt,tau.eta,tau.decayMode,tau.id,self._year)&tau.isclean.astype(np.bool)
            tau_loose=tau[tau.isloose.astype(np.bool)]

            tau_ntot=tau.counts
            tau_nloose=tau_loose.counts

            pho = Initialize({'pt':df['Photon_pt'],
                              'eta':df['Photon_eta'],
                              'phi':df['Photon_phi'],
                              'mass':df['Photon_mass']})

            for key in self._pho_id[self._year]:
                if self._pho_id[self._year][key] in df:
                    pho[key] = df[self._pho_id[self._year][key]]
                else:
                    pho[key] = pho.pt.zeros_like()

            pho['isclean'] =~pho.match(e_loose,0.4)
            pho['isloose']=isLoosePhoton(pho.pt,pho.eta,pho.loose_id,pho.eleveto,self._year)&pho.isclean.astype(np.bool)
            pho['istight']=isTightPhoton(pho.pt,pho.eta,pho.tight_id,pho.eleveto,self._year)&pho.isclean.astype(np.bool)

            leading_pho = pho[pho.pt.argmax()]
            leading_pho = leading_pho[leading_pho.istight.astype(np.bool)]

            pho_loose=pho[pho.isloose.astype(np.bool)]
            pho_tight=pho[pho.istight.astype(np.bool)]

            pho_ntot=pho.counts
            pho_nloose=pho_loose.counts
            pho_ntight=pho_tight.counts

            fj = Initialize({'pt':df['AK15Puppi_pt'],
                             'eta':df['AK15Puppi_eta'],
                             'phi':df['AK15Puppi_phi'],
                             'mass':df['AK15Puppi_mass']})

            fj['msd'] = df['AK15Puppi_msoftdrop']

            for key in self._fj_id[self._year]:
                if self._fj_id[self._year][key] in df:
                    fj[key] = df[self._fj_id[self._year][key]]
                else:
                    fj[key] = fj.pt.zeros_like()

            fj['isgood'] = isGoodFatJet(fj.pt, fj.eta, fj.id)
            fj['isclean'] =~fj.match(pho_loose,1.5)&~fj.match(mu_loose,1.5)&~fj.match(e_loose,1.5)&fj.isgood.astype(np.bool)

            for key in self._deep[self._year]:
                if self._deep[self._year][key] in df:
                    fj[key] = df[self._deep[self._year][key]]
                else:
                    fj[key] = fj.pt.zeros_like()

            fj['probQCD'] = fj.probQCDbb+fj.probQCDcc+fj.probQCDb+fj.probQCDc+fj.probQCDothers
            fj['TvsQCD'] = (fj.probTbcq + fj.probTbqq) / (fj.probTbcq + fj.probTbqq + fj.probQCD)
            fj['ZHbbvsQCD'] = (fj.probZbb + fj.probHbb) / (fj.probZbb+ fj.probHbb+ fj.probQCD)
            fj['VvsQCD'] = (fj.probWcq+fj.probWqq+fj.probZcc+fj.probZqq+fj.probZbb) / (fj.probWcq+fj.probWqq+fj.probZcc+fj.probZqq+fj.probZbb+fj.probQCD)

            leading_fj = fj[fj.pt.argmax()]
            leading_fj = leading_fj[leading_fj.isclean.astype(np.bool)]
            leading_fj_msd_corr = leading_fj.msd.sum()*get_msd_weight(leading_fj.pt.sum(),leading_fj.eta.sum())

            fj_good = fj[fj.isgood.astype(np.bool)]
            fj_clean=fj[fj.isclean.astype(np.bool)]

            fj_ntot=fj.counts
            fj_ngood=fj_good.counts
            fj_nclean=fj_clean.counts

            j = Initialize({'pt':df['Jet_pt'],
                            'eta':df['Jet_eta'],
                            'phi':df['Jet_phi'],
                            'mass':df['Jet_mass']})

            #https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X
            j['deepcsv'] = df['Jet_btagDeepB']
            j['deepflv'] = df['Jet_btagDeepFlavB']

            for key in self._j_id[self._year]:
                if self._j_id[self._year][key] in df:
                    j[key] = df[self._j_id[self._year][key]]
                else:
                    j[key] = j.pt.zeros_like()

            j['isgood'] = isGoodJet(j.pt, j.eta, j.id, j.nhf, j.nef, j.chf, j.cef)
            j['isHEM'] = isHEMJet(j.pt, j.eta, j.phi)
            j['isclean'] = ~j.match(e_loose,0.4)&~j.match(mu_loose,0.4)&~j.match(pho_loose,0.4)&j.isgood.astype(np.bool)
            #j['isclean'] = ~j.match(e_tight,0.4)&~j.match(mu_tight,0.4)&~j.match(pho_tight,0.4)&j.isgood
            j['isiso'] =  ~(j.match(fj_clean,1.5))&j.isclean.astype(np.bool)
            j['isdcsvL'] = (j.deepcsv>0.1241)&j.isiso.astype(np.bool)
            j['isdflvL'] = (j.deepflv>0.0494)&j.isiso.astype(np.bool)
            j['isdcsvM'] = (j.deepcsv>0.4184)&j.isiso.astype(np.bool)
            j['isdflvM'] = (j.deepflv>0.2770)&j.isiso.astype(np.bool)
            j['isdcsvT'] = (j.deepcsv>0.7527)&j.isiso.astype(np.bool)
            j['isdflvT'] = (j.deepflv>0.7264)&j.isiso.astype(np.bool)

            leading_j = j[j.pt.argmax()]
            leading_j = leading_j[leading_j.isclean.astype(np.bool)]

            j_good = j[j.isgood.astype(np.bool)]
            j_clean = j[j.isclean.astype(np.bool)]
            j_iso = j[j.isiso.astype(np.bool)]
            j_dcsvL = j[j.isdcsvL]
            j_dflvL = j[j.isdflvL]
            j_dcsvM = j[j.isdcsvM]
            j_dflvM = j[j.isdflvM]
            j_dcsvT = j[j.isdcsvT]
            j_dflvT = j[j.isdflvT]
            j_HEM = j[j.isHEM.astype(np.bool)]

            j_ntot=j.counts
            j_ngood=j_good.counts
            j_nclean=j_clean.counts
            j_niso=j_iso.counts
            j_ndcsvL=j_dcsvL.counts
            j_ndflvL=j_dflvL.counts
            j_ndcsvM=j_dcsvM.counts
            j_ndflvM=j_dflvM.counts
            j_ndcsvT=j_dcsvT.counts
            j_ndflvT=j_dflvT.counts
            j_nHEM = j_HEM.counts

            ###
            #Calculating derivatives
            ###
            ele_pairs = e_loose.distincts()
            diele = leading_e
            leading_diele = leading_e
            if ele_pairs.i0.content.size>0:
                diele = ele_pairs.i0+ele_pairs.i1
                leading_diele = diele[diele.pt.argmax()]

            mu_pairs = mu_loose.distincts()
            dimu = leading_mu
            leading_dimu = leading_mu
            if mu_pairs.i0.content.size>0:
                dimu = mu_pairs.i0+mu_pairs.i1
                leading_dimu = dimu[dimu.pt.argmax()]

            u={}
            u["iszeroL"] = met
            u["isoneM"] = met+leading_mu.sum()
            u["isoneE"] = met+leading_e.sum()
            u["istwoM"] = met+leading_dimu.sum()
            u["istwoE"] = met+leading_diele.sum()
            u["isoneA"] = met+leading_pho.sum()

            lepSys={}
            lepSys["iszeroL"] = met
            lepSys["isoneM"] = leading_mu.sum()
            lepSys["isoneE"] = leading_e.sum()
            lepSys["istwoM"] = leading_dimu.sum()
            lepSys["istwoE"] = leading_diele.sum()
            lepSys["isoneA"] = leading_pho.sum()

            leadlepton={}
            leadlepton["iszeroL"] = met
            leadlepton["isoneM"] = leading_mu.sum()
            leadlepton["isoneE"] = leading_e.sum()
            leadlepton["istwoM"] = leading_mu.sum()
            leadlepton["istwoE"] = leading_e.sum()
            leadlepton["isoneA"] = leading_pho.sum()

            ###
            #Calculating weights
            ###

            ###
            # For MC, retrieve the LHE weights, to take into account NLO destructive interference, and their sum
            ###

            genw = np.ones_like(df['MET_pt'])
            sumw = 1.
            wnlo = np.ones_like(df['MET_pt'])
            adhocw = np.ones_like(df['MET_pt'])
            if self._xsec[dataset] != -1:
                genw = df['genWeight']
                sumw = genw.sum()

                if 'TTJets' in dataset or 'WJets' in dataset or 'DY' in dataset or 'ZJets' in dataset:
                    gen_flags = df['GenPart_statusFlags']
                    LastCopy = (gen_flags&(1 << 13))==0
                    #genLastCopy = Initialize({'pt':df['GenPart_pt'][LastCopy],
                    #                          'eta':df['GenPart_eta'][LastCopy],
                    #                          'phi':df['GenPart_phi'][LastCopy],
                    #                          'mass':df['GenPart_mass'][LastCopy],
                    #                          'pdgid':df['GenPart_pdgId'][LastCopy]})
                    gen_pt = df['GenPart_pt'][LastCopy]
                    gen_pdgid = df['GenPart_pdgId'][LastCopy]

                    #genTops = genLastCopy[abs(genLastCopy.pdgid)==6]
                    #genWs = genLastCopy[abs(genLastCopy.pdgid)==24]
                    #genZs = genLastCopy[abs(genLastCopy.pdgid)==23]
                    #genAs = genLastCopy[abs(genLastCopy.pdgid)==22]
                    #genHs = genLastCopy[abs(genLastCopy.pdgid)==25]
                    genTops = gen_pt[abs(gen_pdgid)==6]
                    genWs = gen_pt[abs(gen_pdgid)==24]
                    genZs = gen_pt[abs(gen_pdgid)==23]
                    genAs = gen_pt[abs(gen_pdgid)==22]
                    genHs = gen_pt[abs(gen_pdgid)==25]

                    isTT = (genTops.counts==2)
                    isW  = (genTops.counts==0)&(genWs.counts==1)&(genZs.counts==0)&(genAs.counts==0)&(genHs.counts==0)
                    isZ  = (genTops.counts==0)&(genWs.counts==0)&(genZs.counts==1)&(genAs.counts==0)&(genHs.counts==0)
                    isA  = (genTops.counts==0)&(genWs.counts==0)&(genZs.counts==0)&(genAs.counts==1)&(genHs.counts==0)
                    if('TTJets' in dataset): 
                        wnlo = np.sqrt(get_ttbar_weight(genTops[0].sum()) * get_ttbar_weight(genTops[1].sum()))
                    elif('WJets' in dataset): 
                        wnlo = get_nlo_weight[self._year]['w'](genWs[0].sum())
                        if self._year != '2016': adhocw = get_adhoc_weight['w'](genWs[0].sum())
                    elif('DY' in dataset or 'ZJets' in dataset): 
                        wnlo = get_nlo_weight[self._year]['z'](genZs[0].sum())
                        if self._year != '2016': adhocw = get_adhoc_weight['z'](genZs[0].sum())
                    elif('GJets' in dataset): wnlo = get_nlo_weight[self._year]['a'](genAs[0].sum())

            ###
            # Calculate PU weight and systematic variations
            ###

            nvtx = df['PV_npvs']
            pu = get_pu_weight[self._year]['cen'](nvtx)
            puUp = get_pu_weight[self._year]['up'](nvtx)
            puDown = get_pu_weight[self._year]['down'](nvtx)

            ###
            #Importing the MET filters per year from metfilters.py and constructing the filter boolean
            ###

            met_filters = {}
            for flag in met_filter_flags[self._year]:
                if flag in df:
                    met_filters[flag] = df[flag]

            ###
            #Importing the trigger paths per year from trigger.py and constructing the trigger boolean
            ###

            pass_trig = {}
            met_trigger = {}
            for path in met_trigger_paths[self._year]:
                if path in df:
                    met_trigger[path] = df[path]
            passMetTrig = np.zeros_like(df['MET_pt'], dtype=np.bool)
            for path in met_trigger:
                passMetTrig |= met_trigger[path]

            singleele_trigger = {}
            for path in singleele_trigger_paths[self._year]:
                if path in df:
                    singleele_trigger[path] = df[path]
            passSingleEleTrig = np.zeros_like(df['MET_pt'], dtype=np.bool)
            for path in singleele_trigger:
                passSingleEleTrig |= singleele_trigger[path]

            singlepho_trigger = {}
            for path in singlepho_trigger_paths[self._year]:
                if path in df:
                    singlepho_trigger[path] = df[path]
            passSinglePhoTrig = np.zeros_like(df['MET_pt'], dtype=np.bool)
            for path in singlepho_trigger:
                passSinglePhoTrig |= singlepho_trigger[path]

            pass_trig['iszeroL'] = passMetTrig
            pass_trig['isoneM'] = passMetTrig
            pass_trig['istwoM'] = passMetTrig
            pass_trig['isoneE'] = passSingleEleTrig
            pass_trig['istwoE'] = passSingleEleTrig
            pass_trig['isoneA'] =passSinglePhoTrig

            ###
            # Trigger efficiency weight
            ###

            trig = {}
            trig['iszeroL'] = get_met_trig_weight[self._year](u["iszeroL"].pt)
            trig['isoneM'] = get_met_trig_weight[self._year](u["isoneM"].pt)
            trig['istwoM'] = get_met_zmm_trig_weight[self._year](u["istwoM"].pt)
            trig['isoneE'] = get_ele_trig_weight[self._year](leading_e.eta.sum(), leading_e.pt.sum())
            trig['istwoE'] = trig['isoneE']
            if ele_pairs.i0.content.size>0:
                eff1 = get_ele_trig_weight[self._year](ele_pairs[diele.pt.argmax()].i0.eta.sum(),ele_pairs[diele.pt.argmax()].i0.pt.sum())
                eff2 = get_ele_trig_weight[self._year](ele_pairs[diele.pt.argmax()].i1.eta.sum(),ele_pairs[diele.pt.argmax()].i1.pt.sum())
                trig['istwoE'] = 1 - (1-eff1)*(1-eff2)
            trig['isoneA'] = get_pho_trig_weight[self._year](leading_pho.pt.sum())

            ###
            #Event selection
            ###

            selections = processor.PackedSelection()

            selections.add('iszeroL', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0))
            selections.add('isoneM', (e_nloose==0)&(mu_ntight==1)&(tau_nloose==0)&(pho_nloose==0))
            selections.add('isoneE', (e_ntight==1)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)&(met.pt>50))
            selections.add('istwoM', (e_nloose==0) & (mu_ntight>=1) & (mu_nloose==2) & (tau_nloose==0)&(pho_nloose==0)&(leading_dimu.mass.sum()>60) & (leading_dimu.mass.sum()<120))
            selections.add('istwoE', (e_ntight>=1) & (e_nloose==2)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)&(leading_diele.mass.sum()>60)&(leading_diele.mass.sum()<120))
            selections.add('isoneA', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_ntight==1))
            selections.add('noextrab', (j_ndflvL==0))
            selections.add('extrab', (j_ndflvL>0))
            selections.add('ismonohs', (leading_fj.ZHbbvsQCD.sum()>0.65))
            selections.add('ismonojet', ~(leading_fj.ZHbbvsQCD.sum()>0.65))
            selections.add('mass0', (leading_fj_msd_corr<30))
            selections.add('mass1', (leading_fj_msd_corr>=30)&(leading_fj_msd_corr<60))
            selections.add('mass2', (leading_fj_msd_corr>=60)&(leading_fj_msd_corr<80))
            selections.add('mass3', (leading_fj_msd_corr>=80)&(leading_fj_msd_corr<120))
            selections.add('mass4', (leading_fj_msd_corr>=120))
            selections.add('noHEMj', (j_nHEM==0))

            ###
            #Adding weights and selections
            ###

            weights = {}
            regions = {}
            for k in selected_regions[dataset]:
                weights[k] = processor.Weights(df.size)
                weights[k].add('nlo',wnlo)
                weights[k].add('adhoc',adhocw)
                weights[k].add('genw',genw)
                weights[k].add('pileup',pu,puUp,puDown)
                weights[k].add('passMetFilters',np.prod([met_filters[key] for key in met_filters], axis=0))
                weights[k].add('trig', trig[k])
                weights[k].add('pass_trig', pass_trig[k])


                selections.add(k+'baggy', (fj_nclean>0)&(fj_clean.pt.max()>160)&(abs(u[k].delta_phi(j_clean)).min()>0.8)&(u[k].pt>250))

                regions[k+'_baggy'] = {k,k+'baggy','noHEMj','noextrab'}
                regions[k+'_mass0'] = {k,k+'baggy','mass0','noHEMj','noextrab'}
                regions[k+'_mass1'] = {k,k+'baggy','mass1','noHEMj','noextrab'}
                regions[k+'_mass2'] = {k,k+'baggy','mass2','noHEMj','noextrab'}
                regions[k+'_mass3'] = {k,k+'baggy','mass3','noHEMj','noextrab'}
                regions[k+'_mass4'] = {k,k+'baggy','mass4','noHEMj','noextrab'}
                regions[k+'_baggy_extrab'] = {k,k+'baggy','noHEMj','extrab'}
                regions[k+'_mass0_extrab'] = {k,k+'baggy','mass0','noHEMj','extrab'}
                regions[k+'_mass1_extrab'] = {k,k+'baggy','mass1','noHEMj','extrab'}
                regions[k+'_mass2_extrab'] = {k,k+'baggy','mass2','noHEMj','extrab'}
                regions[k+'_mass3_extrab'] = {k,k+'baggy','mass3','noHEMj','extrab'}
                regions[k+'_mass4_extrab'] = {k,k+'baggy','mass4','noHEMj','extrab'}

                regions[k+'_baggy_ismonohs'] = {k,k+'baggy','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass0_ismonohs'] = {k,k+'baggy','mass0','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass1_ismonohs'] = {k,k+'baggy','mass1','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass2_ismonohs'] = {k,k+'baggy','mass2','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass3_ismonohs'] = {k,k+'baggy','mass3','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass4_ismonohs'] = {k,k+'baggy','mass4','noHEMj','noextrab','ismonohs'}
                regions[k+'_baggy_extrab_ismonohs'] = {k,k+'baggy','noHEMj','extrab','ismonohs'}
                regions[k+'_mass0_extrab_ismonohs'] = {k,k+'baggy','mass0','noHEMj','extrab','ismonohs'}
                regions[k+'_mass1_extrab_ismonohs'] = {k,k+'baggy','mass1','noHEMj','extrab','ismonohs'}
                regions[k+'_mass2_extrab_ismonohs'] = {k,k+'baggy','mass2','noHEMj','extrab','ismonohs'}
                regions[k+'_mass3_extrab_ismonohs'] = {k,k+'baggy','mass3','noHEMj','extrab','ismonohs'}
                regions[k+'_mass4_extrab_ismonohs'] = {k,k+'baggy','mass4','noHEMj','extrab','ismonohs'}

                regions[k+'_baggy_ismonojet'] = {k,k+'baggy','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass0_ismonojet'] = {k,k+'baggy','mass0','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass1_ismonojet'] = {k,k+'baggy','mass1','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass2_ismonojet'] = {k,k+'baggy','mass2','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass3_ismonojet'] = {k,k+'baggy','mass3','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass4_ismonojet'] = {k,k+'baggy','mass4','noHEMj','noextrab','ismonojet'}
                regions[k+'_baggy_extrab_ismonojet'] = {k,k+'baggy','noHEMj','extrab','ismonojet'}
                regions[k+'_mass0_extrab_ismonojet'] = {k,k+'baggy','mass0','noHEMj','extrab','ismonojet'}
                regions[k+'_mass1_extrab_ismonojet'] = {k,k+'baggy','mass1','noHEMj','extrab','ismonojet'}
                regions[k+'_mass2_extrab_ismonojet'] = {k,k+'baggy','mass2','noHEMj','extrab','ismonojet'}
                regions[k+'_mass3_extrab_ismonojet'] = {k,k+'baggy','mass3','noHEMj','extrab','ismonojet'}
                regions[k+'_mass4_extrab_ismonojet'] = {k,k+'baggy','mass4','noHEMj','extrab','ismonojet'}

            variables = {}
            variables['j1pt'] = leading_j.pt
            variables['j1eta'] = leading_j.eta
            variables['j1phi'] = leading_j.phi
            variables['fj1pt'] = leading_fj.pt
            variables['fj1eta'] = leading_fj.eta
            variables['fj1phi'] = leading_fj.phi
            variables['e1pt'] = leading_e.pt
            variables['e1phi'] = leading_e.phi
            variables['e1eta'] = leading_e.eta
            variables['dielemass'] = leading_diele.mass
            variables['mu1pt'] = leading_mu.pt
            variables['mu1phi'] = leading_mu.phi
            variables['mu1eta'] = leading_mu.eta
            variables['dimumass'] = leading_dimu.mass
            variables['njets'] = j_nclean
            variables['ndcsvL'] = j_ndcsvL
            variables['ndflvL'] = j_ndflvL
            variables['ndcsvM'] = j_ndcsvM
            variables['ndflvM'] = j_ndflvM
            variables['ndcsvT'] = j_ndcsvT
            variables['ndflvT'] = j_ndflvT
            variables['nfjtot'] = fj_ntot
            variables['nfjgood'] = fj_ngood
            variables['nfjclean'] = fj_nclean
            variables['TvsQCD'] = leading_fj.TvsQCD
            variables['ZHbbvsQCD'] = leading_fj.ZHbbvsQCD
            variables['VvsQCD'] = leading_fj.VvsQCD
            variables['probTbcq']      = leading_fj.probTbcq
            variables['probTbqq']      = leading_fj.probTbqq
            variables['probTbc']       = leading_fj.probTbc
            variables['probTbq']       = leading_fj.probTbq
            variables['probWcq']       = leading_fj.probWcq
            variables['probWqq']       = leading_fj.probWqq
            variables['probZbb']       = leading_fj.probZbb
            variables['probZcc']       = leading_fj.probZcc
            variables['probZqq']       = leading_fj.probZqq
            variables['probHbb']       = leading_fj.probHbb
            variables['probHcc']       = leading_fj.probHcc
            variables['probHqqqq']     = leading_fj.probHqqqq
            variables['probQCDbb']     = leading_fj.probQCDbb
            variables['probQCDcc']     = leading_fj.probQCDcc
            variables['probQCDb']      = leading_fj.probQCDb
            variables['probQCDc']      = leading_fj.probQCDc
            variables['probQCDothers'] = leading_fj.probQCDothers

            hout = self.accumulator.identity()
            hout['sumw'].fill(dataset=dataset, sumw=1, weight=sumw)
            i = 0
            while i < len(selected_regions[dataset]):
                r = selected_regions[dataset][i]
                weight = weights[r].weight()
                for s in ['baggy','mass0','mass1','mass2','mass3','mass4',
                          'baggy_extrab','mass0_extrab','mass1_extrab','mass2_extrab','mass3_extrab','mass4_extrab',
                          'baggy_ismonohs','mass0_ismonohs','mass1_ismonohs','mass2_ismonohs','mass3_ismonohs','mass4_ismonohs',
                          'baggy_extrab_ismonohs','mass0_extrab_ismonohs','mass1_extrab_ismonohs','mass2_extrab_ismonohs','mass3_extrab_ismonohs','mass4_extrab_ismonohs',
                          'baggy_ismonojet','mass0_ismonojet','mass1_ismonojet','mass2_ismonojet','mass3_ismonojet','mass4_ismonojet',
                          'baggy_extrab_ismonojet','mass0_extrab_ismonojet','mass1_extrab_ismonojet','mass2_extrab_ismonojet','mass3_extrab_ismonojet','mass4_extrab_ismonojet']:
                    cut = selections.all(*regions[r+'_'+s])
                    flat_variables = {k: v[cut].flatten() for k, v in variables.items()}
                    flat_weights = {k: (~np.isnan(v[cut])*weight[cut]).flatten() for k, v in variables.items()}
                    for histname, h in hout.items():
                        if not isinstance(h, hist.Hist):
                            continue
                        elif histname == 'sumw':
                            continue
                        elif histname == 'fjmass':
                            h.fill(dataset=dataset, region=r, jet_selection=s, fjmass=leading_fj_msd_corr, weight=weight*cut)
                        elif histname == 'recoil':
                            h.fill(dataset=dataset, region=r, jet_selection=s, recoil=u[r].pt, weight=weight*cut)
                        elif histname == 'CaloMinusPfOverRecoil':
                            h.fill(dataset=dataset, region=r, jet_selection=s, CaloMinusPfOverRecoil= abs(calomet.pt - met.pt) / u[r].pt, weight=weight*cut)
                        elif histname == 'mindphi':
                            h.fill(dataset=dataset, region=r, jet_selection=s, mindphi=abs(u[r].delta_phi(j_clean)).min(), weight=weight*cut)
                        elif histname == 'diledphi':
                            h.fill(dataset=dataset, region=r, jet_selection=s, diledphi=abs(lepSys[r].delta_phi(j_clean)).min(), weight=weight*cut)
                        elif histname == 'ledphi':
                            h.fill(dataset=dataset, region=r, jet_selection=s, ledphi=abs(leadlepton[r].delta_phi(j_clean)).min(), weight=weight*cut)
                        elif histname == 'recoilVSmindphi':
                            h.fill(dataset=dataset, region=r, jet_selection=s, recoil=u[r].pt, mindphi=abs(u[r].delta_phi(j_clean)).min(), weight=weight*cut)
                        else:
                            flat_variable = {histname: flat_variables[histname]}
                            h.fill(dataset=dataset, region=r, jet_selection=s, **flat_variable, weight=flat_weights[histname])
                i += 1
            return hout
Example #18
0
    def process(self, df):
        output = self.accumulator.identity()
        if df.size == 0: return output

        dataset = df['dataset']

        ## construct weights ##
        wgts = processor.Weights(df.size)
        if len(dataset) != 1:
            wgts.add('genw', df['weight'])

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        ak4jets = JaggedCandidateArray.candidatesfromcounts(
            df['akjet_ak4PFJetsCHS_p4'],
            px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'],
            py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'],
            pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'],
            energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'],
            jetid=df['akjet_ak4PFJetsCHS_jetid'],
        )
        ak4jets = ak4jets[ak4jets.jetid & (ak4jets.pt > 20) &
                          (np.abs(ak4jets.eta) < 2.5)]

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'],
            py=df['pfjet_p4.fCoordinates.fY'],
            pz=df['pfjet_p4.fCoordinates.fZ'],
            energy=df['pfjet_p4.fCoordinates.fT'],
            pfisoAll05=df['pfjet_pfIsolation05'],
            pfisoNopu05=df['pfjet_pfIsolationNoPU05'],
            pfisoDbeta=df['pfjet_pfiso'],
            ncands=df['pfjet_pfcands_n'],
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype == 3).sum()
        ndsa = (ljdautype == 8).sum()
        isegammajet = (npfmu == 0) & (ndsa == 0)
        ispfmujet = (npfmu >= 2) & (ndsa == 0)
        isdsajet = ndsa > 0
        label = isegammajet.astype(int) * 1 + ispfmujet.astype(
            int) * 2 + isdsajet.astype(int) * 3
        leptonjets.add_attributes(label=label)
        nmu = ((ljdautype == 3) | (ljdautype == 8)).sum()
        leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0))

        ## __ twoleptonjets__
        twoleptonjets = leptonjets.counts >= 2
        dileptonjets = leptonjets[twoleptonjets]
        ak4jets = ak4jets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size == 0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum() == 1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents
                         & muljInLeading2Events).astype(int) * 1

        doubleMuljEvents = dileptonjets.ismutype.sum() == 2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        isControl = (np.abs(lj0.p4.delta_phi(lj1.p4)) < np.pi / 2).flatten()

        ## __isControl__
        if self.dphi_control:
            leptonjets_ = dileptonjets[isControl]
            wgt = wgt[isControl]
            lj0 = lj0[isControl]
            lj1 = lj1[isControl]
            channel_ = channel_[isControl]
        else:
            leptonjets_ = dileptonjets
        if leptonjets_.size == 0: return output

        output['ljpfiso'].fill(dataset=dataset,
                               lj0iso=lj0[channel_ == 2].pfisoAll05.flatten(),
                               lj1iso=lj1[channel_ == 2].pfisoAll05.flatten(),
                               weight=wgt[channel_ == 2],
                               channel='4mu',
                               isotype='all05',
                               njet=ak4jets.counts[channel_ == 2])
        output['ljpfiso'].fill(dataset=dataset,
                               lj0iso=lj0[channel_ == 2].pfisoNopu05.flatten(),
                               lj1iso=lj1[channel_ == 2].pfisoNopu05.flatten(),
                               weight=wgt[channel_ == 2],
                               channel='4mu',
                               isotype='nopu05',
                               njet=ak4jets.counts[channel_ == 2])
        output['ljpfiso'].fill(dataset=dataset,
                               lj0iso=lj0[channel_ == 2].pfisoDbeta.flatten(),
                               lj1iso=lj1[channel_ == 2].pfisoDbeta.flatten(),
                               weight=wgt[channel_ == 2],
                               channel='4mu',
                               isotype='dbeta',
                               njet=ak4jets.counts[channel_ == 2])

        ## 2mu2e
        leptonjets_2mu2e = leptonjets_[channel_ == 1]
        egm_2mu2e = leptonjets_2mu2e[leptonjets_2mu2e.iseltype]
        egm_2mu2e = egm_2mu2e[egm_2mu2e.pt.argmax()]
        mu_2mu2e = leptonjets_2mu2e[leptonjets_2mu2e.ismutype]
        mu_2mu2e = mu_2mu2e[mu_2mu2e.pt.argmax()]
        output['ljpfiso'].fill(dataset=dataset,
                               lj0iso=egm_2mu2e.pfisoAll05.flatten(),
                               lj1iso=mu_2mu2e.pfisoAll05.flatten(),
                               weight=wgt[channel_ == 1],
                               channel='2mu2e',
                               isotype='all05',
                               njet=ak4jets.counts[channel_ == 1])
        output['ljpfiso'].fill(dataset=dataset,
                               lj0iso=egm_2mu2e.pfisoNopu05.flatten(),
                               lj1iso=mu_2mu2e.pfisoNopu05.flatten(),
                               weight=wgt[channel_ == 1],
                               channel='2mu2e',
                               isotype='nopu05',
                               njet=ak4jets.counts[channel_ == 1])
        output['ljpfiso'].fill(dataset=dataset,
                               lj0iso=egm_2mu2e.pfisoDbeta.flatten(),
                               lj1iso=mu_2mu2e.pfisoDbeta.flatten(),
                               weight=wgt[channel_ == 1],
                               channel='2mu2e',
                               isotype='dbeta',
                               njet=ak4jets.counts[channel_ == 1])

        return output
Example #19
0
    def process(self, df):
        output = self.accumulator.identity()
        if df.size == 0: return output

        dataset = df['dataset']
        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'],
            py=df['pfjet_p4.fCoordinates.fY'],
            pz=df['pfjet_p4.fCoordinates.fZ'],
            energy=df['pfjet_p4.fCoordinates.fT'],
            vx=df['pfjet_klmvtx.fCoordinates.fX'],
            vy=df['pfjet_klmvtx.fCoordinates.fY'],
            vz=df['pfjet_klmvtx.fCoordinates.fZ'],
        )
        leptonjets.add_attributes(vxy=np.hypot(leptonjets.vx, leptonjets.vy))
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype == 3).sum()
        ndsa = (ljdautype == 8).sum()
        isegammajet = (npfmu == 0) & (ndsa == 0)
        ispfmujet = (npfmu >= 2) & (ndsa == 0)
        isdsajet = ndsa > 0
        label = isegammajet.astype(int) * 1 + ispfmujet.astype(
            int) * 2 + isdsajet.astype(int) * 3
        leptonjets.add_attributes(label=label)
        nmu = ((ljdautype == 3) | (ljdautype == 8)).sum()
        leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0))
        ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum()
        leptonjets.add_attributes(qsum=ljdaucharge)
        leptonjets.add_attributes(
            isneutral=(leptonjets.iseltype
                       | (leptonjets.ismutype & (leptonjets.qsum == 0))))

        ## __ twoleptonjets__
        twoleptonjets = (leptonjets.counts >= 2) & (leptonjets.ismutype.sum()
                                                    >= 1)
        dileptonjets = leptonjets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size == 0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum() == 1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents
                         & muljInLeading2Events).astype(int) * 1

        doubleMuljEvents = dileptonjets.ismutype.sum() == 2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        output['lj0pt'].fill(dataset=dataset,
                             pt=lj0.pt.flatten(),
                             channel=channel_,
                             weight=wgt)
        output['lj1pt'].fill(dataset=dataset,
                             pt=lj1.pt.flatten(),
                             channel=channel_,
                             weight=wgt)

        mulj = dileptonjets[dileptonjets.ismutype]
        muljones = mulj.pt.ones_like()
        output['muljmass'].fill(dataset=dataset,
                                ljmass=mulj.mass.flatten(),
                                channel=(channel_ * muljones).flatten(),
                                weight=(wgt * muljones).flatten())
        output['muljvxy'].fill(dataset=dataset,
                               vxy=mulj.vxy.flatten(),
                               channel=(channel_ * muljones).flatten(),
                               weight=(wgt * muljones).flatten())
        output['muljqsum'].fill(dataset=dataset,
                                qsum=mulj.isneutral.flatten(),
                                channel=(channel_ * muljones).flatten(),
                                weight=(wgt * muljones).flatten())

        output['ljpairmass'].fill(dataset=dataset,
                                  pairmass=(lj0.p4 + lj1.p4).mass.flatten(),
                                  channel=channel_,
                                  weight=wgt)
        output['ljpairdphi'].fill(dataset=dataset,
                                  dphi=(np.abs(lj0.p4.delta_phi(
                                      lj1.p4))).flatten(),
                                  channel=channel_,
                                  weight=wgt)

        return output
Example #20
0
    def process(self, df):
        # Dataset parameters
        dataset = df['dataset']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']

        ### Recover objects, selection, functions and others...
        # Objects
        isTightMuon = self._objects['isTightMuon']
        isTightElectron = self._objects['isTightElectron']
        isGoodJet = self._objects['isGoodJet']

        # Corrections
        GetMuonIsoSF = self._corrections['getMuonIso']
        GetMuonIDSF = self._corrections['getMuonID']

        # Selection
        passNJets = self._selection['passNJets']
        passMETcut = self._selection['passMETcut']

        # Functions
        pow2 = self._functions['pow2']

        # Initialize objects
        met = Initialize({
            'pt': df['MET_pt'],
            'eta': 0,
            'phi': df['MET_phi'],
            'mass': 0
        })
        e = Initialize({
            'pt': df['Electron_pt'],
            'eta': df['Electron_eta'],
            'phi': df['Electron_phi'],
            'mass': df['Electron_mass']
        })
        mu = Initialize({
            'pt': df['Muon_pt'],
            'eta': df['Muon_eta'],
            'phi': df['Muon_phi'],
            'mass': df['Muon_mass']
        })
        j = Initialize({
            'pt': df['Jet_pt'],
            'eta': df['Jet_eta'],
            'phi': df['Jet_phi'],
            'mass': df['Jet_mass']
        })

        # Electron selection
        for key in self._e:
            e[key] = e.pt.zeros_like()
            if self._e[key] in df:
                e[key] = df[self._e[key]]
        e['istight'] = isTightElectron(e.pt, e.eta, e.dxy, e.dz, e.id, year)
        leading_e = e[e.pt.argmax()]
        leading_e = leading_e[leading_e.istight.astype(np.bool)]
        nElec = e.counts

        # Muon selection
        for key in self._mu:
            mu[key] = mu.pt.zeros_like()
            if self._mu[key] in df:
                mu[key] = df[self._mu[key]]
        mu['istight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.iso,
                                    mu.tight_id, year)
        leading_mu = mu[mu.pt.argmax()]
        leading_mu = leading_mu[leading_mu.istight.astype(np.bool)]
        nMuon = mu.counts

        # Jet selection
        j['deepcsv'] = df['Jet_btagDeepB']
        j['deepflv'] = df['Jet_btagDeepFlavB']
        for key in self._jet:
            j[key] = j.pt.zeros_like()
            if self._jet[key] in df:
                j[key] = df[self._jet[key]]

        j['isgood'] = isGoodJet(j.pt, j.eta, j.id)
        j['isclean'] = ~j.match(e, 0.4) & ~j.match(mu, 0.4) & j.isgood.astype(
            np.bool)
        j0 = j[j.pt.argmax()]
        j0 = j0[j0.isclean.astype(np.bool)]
        nJets = j.counts

        # Dilepton pair
        ele_pairs = e.distincts()
        diele = leading_e
        leading_diele = leading_e
        if ele_pairs.i0.content.size > 0:
            diele = ele_pairs.i0 + ele_pairs.i1
            leading_diele = diele[diele.pt.argmax()]

        mu_pairs = mu.distincts()
        dimu = leading_mu
        leading_dimu = leading_mu
        if mu_pairs.i0.content.size > 0:
            dimu = mu_pairs.i0 + mu_pairs.i1
            leading_dimu = dimu[dimu.pt.argmax()]
        mmumu = leading_dimu.mass

        # Triggers

        # MET filters

        # Weights
        genw = np.ones_like(df['MET_pt']) if isData else df['genWeight']
        weights = processor.Weights(df.size)
        weights.add('norm', xsec / sow * genw)

        # Selections and cuts
        selections = processor.PackedSelection()
        channels = ['em', 'mm', 'ee']
        selections.add('em', (nElec == 1) & (nMuon == 1))
        selections.add('ee', (nElec >= 2))
        selections.add('mm', (nMuon >= 2))

        levels = ['dilepton', '2jets']
        selections.add('dilepton',
                       (nElec >= 2) | (nMuon >= 2) | ((nElec + nMuon) >= 2))
        selections.add('2jets', (nJets >= 2))

        # Variables

        # Fill Histos
        hout = self.accumulator.identity()
        hout['dummy'].fill(sample=dataset, dummy=1, weight=df.size)

        for ch in channels:
            for lev in levels:
                weight = weights.weight()
                cuts = [ch] + [lev]
                cut = selections.all(*cuts)
                invmass_flat = mmumu[cut].flatten()
                weights_flat = (~np.isnan(mmumu[cut]) * weight[cut]).flatten()

                hout['invmass'].fill(
                    sample=dataset,
                    channel=ch,
                    level=lev,
                    invmass=invmass_flat,
                    weight=weights_flat)  #*selections.all(*{'mm'})
        #flat_variables = {k: v[cut].flatten() for k, v in variables.items()}
        #flat_weights = {k: (~np.isnan(v[cut])*weight[cut]).flatten() for k, v in variables.items()}

        #hout['invmass'].fill(sample=dataset, channel='mm', level="dilepton", invmass=mmumu, weight=np.ones_like(df['MET_pt']))#weight=weights.weight())#*selections.all(*{'mm'})

        return hout
Example #21
0
    def process(self, events):

        # Initialize accumulator
        out = self.accumulator.identity()
        dataset = sample_name
        # events.metadata['dataset']

        # Data or MC
        isData = "genWeight" not in events.fields
        isFake = self._isFake

        # Stop processing if there is no event remain
        if len(events) == 0:
            return out

        # Golden Json file
        if (self._year == "2018") and isData:
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABD"

        if (self._year == "2017") and isData:
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt"

        # <----- Get Scale factors ------>#

        if not isData:

            # Egamma reco ID
            get_ele_reco_above20_sf = self._corrections[
                "get_ele_reco_above20_sf"][self._year]
            get_ele_medium_id_sf = self._corrections["get_ele_medium_id_sf"][
                self._year]
            get_pho_medium_id_sf = self._corrections["get_pho_medium_id_sf"][
                self._year]

            # DoubleEG trigger # 2016, 2017 are not applied yet
            if self._year == "2018":
                get_ele_trig_leg1_SF = self._corrections[
                    "get_ele_trig_leg1_SF"][self._year]
                get_ele_trig_leg1_data_Eff = self._corrections[
                    "get_ele_trig_leg1_data_Eff"][self._year]
                get_ele_trig_leg1_mc_Eff = self._corrections[
                    "get_ele_trig_leg1_mc_Eff"][self._year]
                get_ele_trig_leg2_SF = self._corrections[
                    "get_ele_trig_leg2_SF"][self._year]
                get_ele_trig_leg2_data_Eff = self._corrections[
                    "get_ele_trig_leg2_data_Eff"][self._year]
                get_ele_trig_leg2_mc_Eff = self._corrections[
                    "get_ele_trig_leg2_mc_Eff"][self._year]

            # Muon ID, Iso
            get_mu_tight_id_sf = self._corrections["get_mu_tight_id_sf"][
                self._year]
            get_mu_tight_iso_sf = self._corrections["get_mu_tight_iso_sf"][
                self._year]

            # PU weight with custom made npy and multi-indexing
            pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64")
            pu = self._puweight_arr[pu_weight_idx]

        # <----- Helper functions ------>#

        #  Sort by PT  helper function
        def sort_by_pt(ele, pho, jet):
            ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)]
            pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)]
            jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)]

            return ele, pho, jet

        # Lorentz vectors
        from coffea.nanoevents.methods import vector

        ak.behavior.update(vector.behavior)

        def TLorentz_vector(vec):
            vec = ak.zip(
                {
                    "x": vec.x,
                    "y": vec.y,
                    "z": vec.z,
                    "t": vec.t
                },
                with_name="LorentzVector",
            )
            return vec

        def TLorentz_vector_cylinder(vec):

            vec = ak.zip(
                {
                    "pt": vec.pt,
                    "eta": vec.eta,
                    "phi": vec.phi,
                    "mass": vec.mass,
                },
                with_name="PtEtaPhiMLorentzVector",
            )

            return vec

        # <----- Selection ------>#

        Initial_events = events
        # Good Run ( Golden Json files )
        from coffea import lumi_tools

        if isData:
            lumi_mask_builder = lumi_tools.LumiMask(injson)
            lumimask = ak.Array(
                lumi_mask_builder.__call__(events.run, events.luminosityBlock))
            events = events[lumimask]
            # print("{0}%  of files pass good-run conditions".format(len(events)/ len(Initial_events)))

        # Stop processing if there is no event remain
        if len(events) == 0:
            return out

        # Cut flow
        cut0 = np.zeros(len(events))

        ##----------- Cut flow1: Passing Triggers

        # double lepton trigger
        is_double_ele_trigger = True
        if not is_double_ele_trigger:
            double_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._doubleelectron_triggers[self._year]:
                if path not in events.HLT.fields:
                    continue
                double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[
                    path]

        # single lepton trigger
        is_single_ele_trigger = True
        if not is_single_ele_trigger:
            single_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._singleelectron_triggers[self._year]:
                if path not in events.HLT.fields:
                    continue
                single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[
                    path]

        events.Electron, events.Photon, events.Jet = sort_by_pt(
            events.Electron, events.Photon, events.Jet)

        # Good Primary vertex
        nPV = events.PV.npvsGood
        nPV_nw = events.PV.npvsGood
        if not isData:
            nPV = nPV * pu

            print(pu)

        # Apply cut1
        events = events[double_ele_triggers_arr]
        if not isData:
            pu = pu[double_ele_triggers_arr]

        # Stop processing if there is no event remain
        if len(events) == 0:
            return out

        cut1 = np.ones(len(events))

        # Set Particles
        Electron = events.Electron
        Muon = events.Muon
        Photon = events.Photon
        MET = events.MET
        Jet = events.Jet

        ##----------- Cut flow2: Muon Selection
        MuSelmask = ((Muon.pt >= 10)
                     & (abs(Muon.eta) <= 2.5)
                     & (Muon.tightId)
                     & (Muon.pfRelIso04_all < 0.15))
        Muon = Muon[MuSelmask]

        # Exatly one muon
        Muon_sel_mask = ak.num(Muon) == 1
        Electron = Electron[Muon_sel_mask]
        Photon = Photon[Muon_sel_mask]
        Jet = Jet[Muon_sel_mask]
        MET = MET[Muon_sel_mask]
        Muon = Muon[Muon_sel_mask]
        events = events[Muon_sel_mask]
        if not isData:
            pu = pu[Muon_sel_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut2 = np.ones(len(Photon)) * 2

        ##----------- Cut flow3: Electron Selection

        EleSelmask = ((Electron.pt >= 10)
                      & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479)
                      & (Electron.cutBased > 2)
                      & (abs(Electron.dxy) < 0.05)
                      & (abs(Electron.dz) < 0.1)) | (
                          (Electron.pt >= 10)
                          &
                          (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479)
                          & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5)
                          & (Electron.cutBased > 2)
                          & (abs(Electron.dxy) < 0.1)
                          & (abs(Electron.dz) < 0.2))

        Electron = Electron[EleSelmask]

        # Exactly two electrons
        ee_mask = ak.num(Electron) == 2
        Electron = Electron[ee_mask]
        Photon = Photon[ee_mask]
        Jet = Jet[ee_mask]
        MET = MET[ee_mask]
        Muon = Muon[ee_mask]
        if not isData:
            pu = pu[ee_mask]
        events = events[ee_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut3 = np.ones(len(Photon)) * 3

        ##----------- Cut flow4: Photon Selection

        # Basic photon selection
        isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) &
                                                  (abs(Photon.eta) < 2.5))
        Pixel_seed_mask = ~Photon.pixelSeed

        if (dataset == "ZZ") and (self._year == "2017"):
            PT_ID_mask = (Photon.pt >= 20) & (
                Photon.cutBasedBitmap >= 3
            )  # 2^0(Loose) + 2^1(Medium) + 2^2(Tights)
        else:
            PT_ID_mask = (Photon.pt >= 20) & (Photon.cutBased > 1)

        # dR cut with selected Muon and Electrons
        dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5,
                                 axis=-1)  # default metric table: delta_r
        dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1)

        # genPartFlav cut
        """
		if dataset == "WZG":
			isPrompt = (Photon.genPartFlav == 1) | (Photon.genPartFlav == 11)
			PhoSelmask = PT_ID_mask & isgap_mask &  Pixel_seed_mask & isPrompt & dr_pho_ele_mask & dr_pho_mu_mask

		elif dataset == "WZ":
			isPrompt = (Photon.genPartFlav == 1) 
			PhoSelmask = PT_ID_mask & isgap_mask &  Pixel_seed_mask & ~isPrompt & dr_pho_ele_mask & dr_pho_mu_mask
				
		else:
			PhoSelmask = PT_ID_mask  & isgap_mask &  Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask
		"""

        # Add genPartFlav to remove Fake Photon in MC samples ( They are already considered by data driven method )
        if not isData:
            genPartFlav_mask = (Photon.genPartFlav == 1)
            PhoSelmask = (genPartFlav_mask & PT_ID_mask & isgap_mask
                          & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask)
        else:
            PhoSelmask = (PT_ID_mask & isgap_mask & Pixel_seed_mask
                          & dr_pho_ele_mask & dr_pho_mu_mask)

        Photon = Photon[PhoSelmask]

        # Apply cut 4
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        if not isData:
            pu = pu[A_photon_mask]
        events = events[A_photon_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        def make_leading_pair(target, base):
            return target[ak.argmax(base.pt, axis=1, keepdims=True)]

        leading_pho = make_leading_pair(Photon, Photon)

        # -------------------- Make Fake Photon BKGs---------------------------#
        def make_bins(pt, eta, bin_range_str):

            bin_dict = {
                "PT_1_eta_1": (pt > 20) & (pt < 30) & (eta < 1),
                "PT_1_eta_2": (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5),
                "PT_1_eta_3": (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2),
                "PT_1_eta_4": (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5),
                "PT_2_eta_1": (pt > 30) & (pt < 40) & (eta < 1),
                "PT_2_eta_2": (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5),
                "PT_2_eta_3": (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2),
                "PT_2_eta_4": (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5),
                "PT_3_eta_1": (pt > 40) & (pt < 50) & (eta < 1),
                "PT_3_eta_2": (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5),
                "PT_3_eta_3": (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2),
                "PT_3_eta_4": (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5),
                "PT_4_eta_1": (pt > 50) & (eta < 1),
                "PT_4_eta_2": (pt > 50) & (eta > 1) & (eta < 1.5),
                "PT_4_eta_3": (pt > 50) & (eta > 1.5) & (eta < 2),
                "PT_4_eta_4": (pt > 50) & (eta > 2) & (eta < 2.5),
            }

            binmask = bin_dict[bin_range_str]

            return binmask

        bin_name_list = [
            "PT_1_eta_1",
            "PT_1_eta_2",
            "PT_1_eta_3",
            "PT_1_eta_4",
            "PT_2_eta_1",
            "PT_2_eta_2",
            "PT_2_eta_3",
            "PT_2_eta_4",
            "PT_3_eta_1",
            "PT_3_eta_2",
            "PT_3_eta_3",
            "PT_3_eta_4",
            "PT_4_eta_1",
            "PT_4_eta_2",
            "PT_4_eta_3",
            "PT_4_eta_4",
        ]

        ## -- Fake-fraction Lookup table --##
        if isFake:
            # Make Bin-range mask
            binned_pteta_mask = {}
            for name in bin_name_list:
                binned_pteta_mask[name] = make_bins(
                    ak.flatten(leading_pho.pt),
                    ak.flatten(abs(leading_pho.eta)),
                    name,
                )
            # Read Fake fraction --> Mapping bin name to int()
            if self._year == "2018":
                in_dict = np.load("Fitting_2018/Fit_results.npy",
                                  allow_pickle="True")[()]

            if self._year == "2017":
                in_dict = np.load("Fitting_2017/Fit_results.npy",
                                  allow_pickle="True")[()]

            idx = 0
            fake_dict = {}
            for i, j in in_dict.items():
                fake_dict[idx] = j
                idx += 1

            # Reconstruct Fake_weight
            fw = 0
            for i, j in binned_pteta_mask.items():
                fw = fw + j * fake_dict[bin_name_list.index(i)]

            # Process 0 weight to 1
            @numba.njit
            def zero_one(x):
                if x == 0:
                    x = 1
                return x

            vec_zero_one = np.vectorize(zero_one)
            fw = vec_zero_one(fw)
        else:
            fw = np.ones(len(events))

        cut4 = np.ones(len(Photon)) * 4
        print("Fake fraction weight: ", len(fw), len(cut4), fw)

        ##----------- Cut flow5: OSSF
        ossf_mask = Electron.charge[:, 0] + Electron.charge[:, 1] == 0

        # Apply cut 5
        Electron = Electron[ossf_mask]
        Photon = Photon[ossf_mask]
        fw = fw[ossf_mask]
        Jet = Jet[ossf_mask]
        MET = MET[ossf_mask]
        Muon = Muon[ossf_mask]
        if not isData:
            pu = pu[ossf_mask]
        events = events[ossf_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut5 = np.ones(ak.sum(ak.num(Electron) > 0)) * 5

        # Define Electron Triplet
        Diele = ak.zip({
            "lep1": Electron[:, 0],
            "lep2": Electron[:, 1],
            "p4": TLorentz_vector(Electron[:, 0] + Electron[:, 1]),
        })

        leading_ele = Diele.lep1
        subleading_ele = Diele.lep2

        def make_leading_pair(target, base):
            return target[ak.argmax(base.pt, axis=1, keepdims=True)]

        leading_pho = make_leading_pair(Photon, Photon)

        # -- Scale Factor for each electron

        # Trigger weight helper function
        def Trigger_Weight(eta1, pt1, eta2, pt2):
            per_ev_MC = (get_ele_trig_leg1_mc_Eff(eta1, pt1) *
                         get_ele_trig_leg2_mc_Eff(eta2, pt2) +
                         get_ele_trig_leg1_mc_Eff(eta2, pt2) *
                         get_ele_trig_leg2_mc_Eff(eta1, pt1) -
                         get_ele_trig_leg1_mc_Eff(eta1, pt1) *
                         get_ele_trig_leg1_mc_Eff(eta2, pt2))

            per_ev_data = (
                get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF(
                    eta1, pt1) * get_ele_trig_leg2_data_Eff(eta2, pt2) *
                get_ele_trig_leg2_SF(eta2, pt2) +
                get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF(
                    eta2, pt2) * get_ele_trig_leg2_data_Eff(eta1, pt1) *
                get_ele_trig_leg2_SF(eta1, pt1) -
                get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF(
                    eta1, pt1) * get_ele_trig_leg1_data_Eff(eta2, pt2) *
                get_ele_trig_leg1_SF(eta2, pt2))

            return per_ev_data / per_ev_MC

        if not isData:

            ## -------------< Egamma ID and Reco Scale factor > -----------------##
            get_pho_medium_id_sf = get_pho_medium_id_sf(
                ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt))

            ele_reco_sf = get_ele_reco_above20_sf(
                leading_ele.deltaEtaSC + leading_ele.eta,
                leading_ele.pt,
            ) * get_ele_reco_above20_sf(
                subleading_ele.deltaEtaSC + subleading_ele.eta,
                subleading_ele.pt,
            )

            ele_medium_id_sf = get_ele_medium_id_sf(
                leading_ele.deltaEtaSC + leading_ele.eta,
                leading_ele.pt,
            ) * get_ele_medium_id_sf(
                subleading_ele.deltaEtaSC + subleading_ele.eta,
                subleading_ele.pt,
            )

            ## -------------< Muon ID and Iso Scale factor > -----------------##
            get_mu_tight_id_sf = get_mu_tight_id_sf(ak.flatten(abs(Muon.eta)),
                                                    ak.flatten(Muon.pt))
            get_mu_tight_iso_sf = get_mu_tight_iso_sf(
                ak.flatten(abs(Muon.eta)), ak.flatten(Muon.pt))

            ## -------------< Double Electron Trigger Scale factor > -----------------##
            eta1 = leading_ele.deltaEtaSC + leading_ele.eta
            eta2 = subleading_ele.deltaEtaSC + subleading_ele.eta
            pt1 = leading_ele.pt
            pt2 = subleading_ele.pt

            # -- 2017,2016 are not applied yet
            if self._year == "2018":
                ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2)

        ##----------- Cut flow6: Baseline selection

        # Mee cut
        Mee_cut_mask = Diele.p4.mass > 4

        # Lepton PT cuts
        Leppt_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20)
                               & (Muon.pt >= 25))

        # MET cuts
        MET_mask = MET.pt > 20  # Baseline

        # Assemble!!
        Baseline_mask = Leppt_mask & MET_mask & Mee_cut_mask  # SR,CR

        # Apply cut6
        Diele_base = Diele[Baseline_mask]
        leading_pho_base = leading_pho[Baseline_mask]
        Jet_base = Jet[Baseline_mask]
        MET_base = MET[Baseline_mask]
        Muon_base = Muon[Baseline_mask]
        events_base = events[Baseline_mask]

        # Photon  EE and EB
        isEE_mask = leading_pho.isScEtaEE
        isEB_mask = leading_pho.isScEtaEB
        Pho_EE_base = leading_pho[isEE_mask & Baseline_mask]
        Pho_EB_base = leading_pho[isEB_mask & Baseline_mask]

        # Stop processing if there is no event remain
        if len(leading_pho_base) == 0:
            return out

        cut6 = np.ones(ak.sum(ak.num(leading_pho_base) > 0)) * 6

        base_arr_dict = {
            "Diele_sel": Diele_base,
            "leading_pho_sel": leading_pho_base,
            "Jet_sel": Jet_base,
            "MET_sel": MET_base,
            "Muon_sel": Muon_base,
            "Pho_EE_sel": Pho_EE_base,
            "Pho_EB_sel": Pho_EB_base,
        }

        ##-----------  << SR >>
        Zmass_window_mask = abs(Diele.p4.mass - 91.1876) < 15
        MET_mask = MET.pt > 30
        bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0
        Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100
        SR_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask

        SR_mask = Baseline_mask & SR_mask
        Diele_SR = Diele[SR_mask]
        leading_pho_SR = leading_pho[SR_mask]
        Muon_SR = Muon[SR_mask]
        MET_SR = MET[SR_mask]
        Jet_SR = Jet[SR_mask]
        events_SR = events[SR_mask]
        Pho_EE_SR = leading_pho[isEE_mask & SR_mask]
        Pho_EB_SR = leading_pho[isEB_mask & SR_mask]

        SR_arr_dict = {
            "Diele_sel": Diele_SR,
            "leading_pho_sel": leading_pho_SR,
            "Jet_sel": Jet_SR,
            "MET_sel": MET_SR,
            "Muon_sel": Muon_SR,
            "Pho_EE_sel": Pho_EE_SR,
            "Pho_EB_sel": Pho_EB_SR,
        }

        ##-----------  << CR-Z+Jets >>
        Zmass_window_mask = abs(Diele.p4.mass - 91.1876) < 15
        MET_mask = MET.pt <= 30
        bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0
        Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100
        CR_ZJets_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask

        CR_ZJets_mask = Baseline_mask & CR_ZJets_mask
        Diele_CR_ZJets = Diele[CR_ZJets_mask]
        leading_pho_CR_ZJets = leading_pho[CR_ZJets_mask]
        Muon_CR_ZJets = Muon[CR_ZJets_mask]
        MET_CR_ZJets = MET[CR_ZJets_mask]
        Jet_CR_ZJets = Jet[CR_ZJets_mask]
        events_CR_ZJets = events[CR_ZJets_mask]
        Pho_EE_CR_ZJets = leading_pho[isEE_mask & CR_ZJets_mask]
        Pho_EB_CR_ZJets = leading_pho[isEB_mask & CR_ZJets_mask]

        CR_ZJets_arr_dict = {
            "Diele_sel": Diele_CR_ZJets,
            "leading_pho_sel": leading_pho_CR_ZJets,
            "Jet_sel": Jet_CR_ZJets,
            "MET_sel": MET_CR_ZJets,
            "Muon_sel": Muon_CR_ZJets,
            "Pho_EE_sel": Pho_EE_CR_ZJets,
            "Pho_EB_sel": Pho_EB_CR_ZJets,
        }

        ##-----------  << CR-T-enriched >>
        Zmass_window_mask = abs(Diele.p4.mass - 91.1876) > 5
        MET_mask = MET.pt > 30
        bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) > 0
        Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100
        CR_Tenri_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask

        CR_Tenri_mask = Baseline_mask & CR_Tenri_mask
        Diele_CR_t = Diele[CR_Tenri_mask]
        leading_pho_CR_t = leading_pho[CR_Tenri_mask]
        Muon_CR_t = Muon[CR_Tenri_mask]
        MET_CR_t = MET[CR_Tenri_mask]
        Jet_CR_t = Jet[CR_Tenri_mask]
        events_CR_t = events[CR_Tenri_mask]
        Pho_EE_CR_t = leading_pho[isEE_mask & CR_Tenri_mask]
        Pho_EB_CR_t = leading_pho[isEB_mask & CR_Tenri_mask]

        CR_tEnriched_arr_dict = {
            "Diele_sel": Diele_CR_t,
            "leading_pho_sel": leading_pho_CR_t,
            "Jet_sel": Jet_CR_t,
            "MET_sel": MET_CR_t,
            "Muon_sel": Muon_CR_t,
            "Pho_EE_sel": Pho_EE_CR_t,
            "Pho_EB_sel": Pho_EB_CR_t,
        }

        ##-----------  << CR-Conversion >>
        Zmass_window_mask = abs(Diele.p4.mass - 91.1876) > 15
        MET_mask = MET.pt <= 30
        bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0
        Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) <= 100
        CR_conv_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask

        CR_conv_mask = Baseline_mask & CR_conv_mask
        Diele_CR_conv = Diele[CR_conv_mask]
        leading_pho_CR_conv = leading_pho[CR_conv_mask]
        Muon_CR_conv = Muon[CR_conv_mask]
        MET_CR_conv = MET[CR_conv_mask]
        Jet_CR_conv = Jet[CR_conv_mask]
        events_CR_conv = events[CR_conv_mask]
        Pho_EE_CR_conv = leading_pho[isEE_mask & CR_conv_mask]
        Pho_EB_CR_conv = leading_pho[isEB_mask & CR_conv_mask]

        CR_Conversion_dict = {
            "Diele_sel": Diele_CR_conv,
            "leading_pho_sel": leading_pho_CR_conv,
            "Jet_sel": Jet_CR_conv,
            "MET_sel": MET_CR_conv,
            "Muon_sel": Muon_CR_conv,
            "Pho_EE_sel": Pho_EE_CR_conv,
            "Pho_EB_sel": Pho_EB_CR_conv,
        }

        ## -------------------- Prepare making hist --------------#

        regions = {
            "Baseline": base_arr_dict,
            "Signal": SR_arr_dict,
            "CR_ZJets": CR_ZJets_arr_dict,
            "CR_tEnriched": CR_tEnriched_arr_dict,
            "CR_conversion": CR_Conversion_dict,
        }

        mask_dict = {
            "Baseline": Baseline_mask,
            "Signal": SR_mask,
            "CR_ZJets": CR_ZJets_mask,
            "CR_tEnriched": CR_Tenri_mask,
            "CR_conversion": CR_conv_mask,
        }

        for region, arr_dict in regions.items():

            # Photon
            phoPT = ak.flatten(arr_dict["leading_pho_sel"].pt)
            phoEta = ak.flatten(arr_dict["leading_pho_sel"].eta)
            phoPhi = ak.flatten(arr_dict["leading_pho_sel"].phi)

            # Photon EE
            if len(arr_dict["Pho_EE_sel"].pt) != 0:
                Pho_EE_PT = ak.flatten(arr_dict["Pho_EE_sel"].pt)
                Pho_EE_Eta = ak.flatten(arr_dict["Pho_EE_sel"].eta)
                Pho_EE_Phi = ak.flatten(arr_dict["Pho_EE_sel"].phi)
                Pho_EE_sieie = ak.flatten(arr_dict["Pho_EE_sel"].sieie)
                Pho_EE_Iso_charge = ak.flatten(
                    arr_dict["Pho_EE_sel"].pfRelIso03_chg)

            # Photon EB
            if len(arr_dict["Pho_EB_sel"].pt) != 0:
                Pho_EB_PT = ak.flatten(arr_dict["Pho_EB_sel"].pt)
                Pho_EB_Eta = ak.flatten(arr_dict["Pho_EB_sel"].eta)
                Pho_EB_Phi = ak.flatten(arr_dict["Pho_EB_sel"].phi)
                Pho_EB_sieie = ak.flatten(arr_dict["Pho_EB_sel"].sieie)
                Pho_EB_Iso_charge = ak.flatten(
                    arr_dict["Pho_EB_sel"].pfRelIso03_chg)

            # Electrons
            ele1PT = arr_dict["Diele_sel"].lep1.pt
            ele1Eta = arr_dict["Diele_sel"].lep1.eta
            ele1Phi = arr_dict["Diele_sel"].lep1.phi

            ele2PT = arr_dict["Diele_sel"].lep2.pt
            ele2Eta = arr_dict["Diele_sel"].lep2.eta
            ele2Phi = arr_dict["Diele_sel"].lep2.phi

            # Muon
            muPT = ak.flatten(arr_dict["Muon_sel"].pt)
            muEta = ak.flatten(arr_dict["Muon_sel"].eta)
            muPhi = ak.flatten(arr_dict["Muon_sel"].phi)

            # MET
            met = ak.to_numpy(arr_dict["MET_sel"].pt)

            # M(eea) M(ee)
            diele = arr_dict["Diele_sel"].p4
            lll_vec = diele + arr_dict["Muon_sel"][:, 0]
            Mlll = lll_vec.mass
            Mee = diele.mass

            # W MT (--> beta)
            MT = np.sqrt(
                2 * arr_dict["Muon_sel"].pt * arr_dict["MET_sel"].pt *
                (1 - np.cos(
                    abs(arr_dict["MET_sel"].delta_phi(arr_dict["Muon_sel"])))))
            MT = np.array(ak.firsts(MT))

            # --- Apply weight and hist
            weights = processor.Weights(len(cut5))

            # --- skim cut-weight
            def skim_weight(arr):
                mask1 = ~ak.is_none(arr)
                subarr = arr[mask1]
                mask2 = subarr != 0
                return ak.to_numpy(subarr[mask2])

            cuts = mask_dict[region]
            cuts_pho_EE = ak.flatten(isEE_mask)
            cuts_pho_EB = ak.flatten(isEB_mask)

            if isFake:
                weights.add("fake_fraction", fw)

            # Weight and SF here
            if not (isData | isFake):
                weights.add("pileup", pu)
                weights.add("ele_id", ele_medium_id_sf)
                weights.add("ele_reco", ele_reco_sf)
                weights.add("pho_id", get_pho_medium_id_sf)
                weights.add("mu_id", get_mu_tight_id_sf)
                weights.add("mu_iso", get_mu_tight_id_sf)

                # 2016,2017 are not applied yet
                if self._year == "2018":
                    weights.add("ele_trigger", ele_trig_weight)

            # ---------------------------- Fill hist --------------------------------------#

            # Initial events
            out["sumw"][dataset] += len(Initial_events)

            print(
                "region: {0} ### cut0: {1},cut1: {2}, cut2: {3},cut3: {4},cut4: {5},cut5: {6},cut6: {7}, cut7: {8}"
                .format(region, len(cut0), len(cut1), len(cut2), len(cut3),
                        len(cut4), len(cut5), len(cut6), len(met)))

            # Fill hist

            # -- met -- #
            out["met"].fill(
                dataset=dataset,
                region=region,
                met=met,
                weight=skim_weight(weights.weight() * cuts),
            )

            # --mass -- #
            out["MT"].fill(
                dataset=dataset,
                region=region,
                MT=MT,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["mass"].fill(
                dataset=dataset,
                region=region,
                mass=Mee,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["mass_lll"].fill(
                dataset=dataset,
                region=region,
                mass_lll=Mlll,
                weight=skim_weight(weights.weight() * cuts),
            )

            # -- Muon -- #
            out["mupt"].fill(
                dataset=dataset,
                region=region,
                mupt=muPT,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["mueta"].fill(
                dataset=dataset,
                region=region,
                mueta=muEta,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["muphi"].fill(
                dataset=dataset,
                region=region,
                muphi=muPhi,
                weight=skim_weight(weights.weight() * cuts),
            )

            # -- Electron -- #
            out["ele1pt"].fill(
                dataset=dataset,
                region=region,
                ele1pt=ele1PT,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["ele1eta"].fill(
                dataset=dataset,
                region=region,
                ele1eta=ele1Eta,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["ele1phi"].fill(
                dataset=dataset,
                region=region,
                ele1phi=ele1Phi,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["ele2pt"].fill(
                dataset=dataset,
                region=region,
                ele2pt=ele2PT,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["ele2eta"].fill(
                dataset=dataset,
                region=region,
                ele2eta=ele2Eta,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["ele2phi"].fill(
                dataset=dataset,
                region=region,
                ele2phi=ele2Phi,
                weight=skim_weight(weights.weight() * cuts),
            )

            # -- Photon -- #

            out["phopt"].fill(
                dataset=dataset,
                region=region,
                phopt=phoPT,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["phoeta"].fill(
                dataset=dataset,
                region=region,
                phoeta=phoEta,
                weight=skim_weight(weights.weight() * cuts),
            )
            out["phophi"].fill(
                dataset=dataset,
                region=region,
                phophi=phoPhi,
                weight=skim_weight(weights.weight() * cuts),
            )

            if len(arr_dict["Pho_EE_sel"].pt) != 0:

                out["pho_EE_pt"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EE_pt=Pho_EE_PT,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
                )
                out["pho_EE_eta"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EE_eta=Pho_EE_Eta,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
                )
                out["pho_EE_phi"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EE_phi=Pho_EE_Phi,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
                )
                out["pho_EE_sieie"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EE_sieie=Pho_EE_sieie,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
                )
                out["pho_EE_Iso_chg"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EE_Iso_chg=Pho_EE_Iso_charge,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
                )

            if len(arr_dict["Pho_EB_sel"].pt) != 0:
                out["pho_EB_pt"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EB_pt=Pho_EB_PT,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
                )
                out["pho_EB_eta"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EB_eta=Pho_EB_Eta,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
                )
                out["pho_EB_phi"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EB_phi=Pho_EB_Phi,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
                )
                out["pho_EB_sieie"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EB_sieie=Pho_EB_sieie,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
                )
                out["pho_EB_Iso_chg"].fill(
                    dataset=dataset,
                    region=region,
                    pho_EB_Iso_chg=Pho_EB_Iso_charge,
                    weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
                )

        return out
    def process(self, events):
        dataset = events.metadata['dataset']
        isRealData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        weights = processor.Weights(len(events))
        output = self.accumulator.identity()
        if not isRealData:
            output['sumw'][dataset] += events.genWeight.sum()

        if isRealData:
            trigger = np.zeros(events.size, dtype='bool')
            for t in self._triggers[self._year]:
                trigger = trigger | events.HLT[t]
        else:
            trigger = np.ones(events.size, dtype='bool')
        selection.add('trigger', trigger)

        if isRealData:
            trigger = np.zeros(events.size, dtype='bool')
            for t in self._muontriggers[self._year]:
                trigger = trigger | events.HLT[t]
        else:
            trigger = np.ones(events.size, dtype='bool')
        selection.add('muontrigger', trigger)

        try:
            fatjets = events.FatJet
        except AttributeError:
            # early pancakes
            fatjets = events.CustomAK8Puppi
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['rho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)
        fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year]

        candidatejet = fatjets[
            # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
            # & fatjets.isLoose  # not always available
        ][:, 0:1]
        selection.add('minjetkin', ((candidatejet.pt >= 450)
                                    & (candidatejet.msdcorr >= 47.)
                                    & (abs(candidatejet.eta) < 2.5)).any())
        selection.add('jetacceptance', ((candidatejet.msdcorr >= 47.)
                                        & (candidatejet.pt < 1200)
                                        & (candidatejet.msdcorr < 201.)).any())
        selection.add('jetid', candidatejet.isTight.any())
        selection.add('n2ddt', (candidatejet.n2ddt < 0.).any())
        selection.add('ddbpass', (candidatejet.btagDDBvL >= 0.89).any())

        jets = events.Jet[(events.Jet.pt > 30.)
                          & (abs(events.Jet.eta) < 2.5)
                          & events.Jet.isTight]
        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1))
        ak4_opposite = jets[(dphi > np.pi / 2).all()]
        selection.add(
            'antiak4btagMediumOppHem',
            ak4_opposite.btagDeepB.max() <
            BTagEfficiency.btagWPs[self._year]['medium'])
        ak4_away = jets[(dphi > 0.8).all()]
        selection.add(
            'ak4btagMedium08',
            ak4_away.btagDeepB.max() >
            BTagEfficiency.btagWPs[self._year]['medium'])

        selection.add('met', events.MET.pt < 140.)

        goodmuon = ((events.Muon.pt > 10)
                    & (abs(events.Muon.eta) < 2.4)
                    & (events.Muon.pfRelIso04_all < 0.25)
                    & (events.Muon.looseId).astype(bool))
        nmuons = goodmuon.sum()
        leadingmuon = events.Muon[goodmuon][:, 0:1]
        muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True)

        nelectrons = (
            (events.Electron.pt > 10)
            & (abs(events.Electron.eta) < 2.5)
            & (events.Electron.cutBased >= events.Electron.LOOSE)).sum()

        ntaus = ((events.Tau.pt > 20)
                 & (events.Tau.idDecayMode).astype(bool)
                 # bacon iso looser than Nano selection
                 ).sum()

        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon',
                      (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin', ((leadingmuon.pt > 55.)
                                  & (abs(leadingmuon.eta) < 2.1)).all())
        selection.add('muonDphiAK8',
                      (abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) >
                       2 * np.pi / 3).all().all())

        if isRealData:
            genflavor = candidatejet.pt.zeros_like()
        else:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            bosons = getBosons(events)
            genBosonPt = bosons.pt.pad(1, clip=True).fillna(0)
            add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            genflavor = matchedBosonFlavor(candidatejet, bosons).pad(
                1, clip=True).fillna(-1).flatten()
            add_jetTriggerWeight(weights, candidatejet.msdcorr,
                                 candidatejet.pt, self._year)
            output['btagWeight'].fill(dataset=dataset,
                                      val=self._btagSF.addBtagWeight(
                                          weights, ak4_away))
            logger.debug("Weight statistics: %r" % weights._weightStats)

        msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * (
            genflavor > 0) + candidatejet.msdcorr * (genflavor == 0)

        regions = {
            'signal': [
                'trigger',
                'minjetkin',
            ],  #'noleptons','jetacceptance', 'noleptons','jetid',],#'jetid', 'noleptons',],# 'n2ddt','antiak4btagMediumOppHem'],#, 'met',],
            'muoncontrol': [
                'muontrigger',
                'minjetkin',
                'jetid',
                'muonDphiAK8',
                'muonkin',
                'ak4btagMedium08',
                'onemuon',
            ],  # 'muonkin', 'muonDphiAK8'],
            'noselection': [],
        }

        for region, cuts in regions.items():
            allcuts = set()
            logger.debug(
                f"Filling cutflow with: {dataset}, {region}, {genflavor}, {weights.weight()}"
            )
            #output['cutflow'].fill(dataset=dataset, region=region, genflavor=genflavor, cut=0, weight=weights.weight())
            #for i, cut in enumerate(cuts + ['ddbpass']):
            #    allcuts.add(cut)
            #    cut = selection.all(*allcuts)
            #    output['cutflow'].fill(dataset=dataset, region=region, genflavor=genflavor[cut], cut=i + 1, weight=weights.weight()[cut])

        systematics = [
            None,
            'jet_triggerUp',
            'jet_triggerDown',
            'btagWeightUp',
            'btagWeightDown',
            'btagEffStatUp',
            'btagEffStatDown',
        ]

        def normalize(val, cut):
            return val[cut].pad(1, clip=True).fillna(0).flatten()

        def fill(region, systematic=None, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            if wmod is None:
                weight = weights.weight(modifier=systematic)[cut]
            else:
                weight = weights.weight()[cut] * wmod[cut]

            output['templates'].fill(
                dataset=dataset,
                region=region,
                #systematic=sname,
                #genflavor=genflavor[cut],
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(msd_matched, cut),
                #ddb=normalize(candidatejet.btagDDBvL, cut),
                weight=weight,
            )
            if wmod is not None:
                output['genresponse_noweight'].fill(
                    dataset=dataset,
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=events.genWeight[cut] * wmod[cut],
                )
                output['genresponse'].fill(
                    dataset=dataset,
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=weight,
                )

        for region in regions:
            cut = selection.all(*(set(regions[region]) - {'n2ddt'}))
            output['nminus1_n2ddt'].fill(
                dataset=dataset,
                region=region,
                n2ddt=normalize(candidatejet.n2ddt, cut),
                weight=weights.weight()[cut],
            )
            #for systematic in systematics:
            fill(region)  #, systematic)
            if 'GluGluHToBB' in dataset:
                for i in range(9):
                    fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:,
                                                                          i])
                for c in events.LHEWeight.columns[1:]:
                    fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c])

        return output
Example #23
0
    def process(self, events):
        #assert(len(np.unique(events.event)) == len((events.event)))
        dataset = events.metadata['dataset']
        print('process dataset', dataset)
        isRealData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        weights = processor.Weights(len(events))
        output = self.accumulator.identity()
        if(len(events) == 0): return output
        if not isRealData:
            output['sumw'][dataset] += events.genWeight.sum()

       


        # trigger paths
        if isRealData:
            trigger_fatjet = np.zeros(events.size, dtype='bool')
            for t in self._triggers[self._year]:
                try:
                    trigger_fatjet = trigger_fatjet | events.HLT[t]
                except:
                    print('trigger %s not available'%t)
                    continue

            trigger_muon = np.zeros(events.size, dtype='bool')
            for t in self._muontriggers[self._year]:
                trigger_muon = trigger_muon | events.HLT[t]
 
        else:
            trigger_fatjet = np.ones(events.size, dtype='bool')
            trigger_muon = np.ones(events.size, dtype='bool')

        selection.add('fatjet_trigger', trigger_fatjet)
        selection.add('muon_trigger', trigger_muon) 

        # run model on PFCands associated to FatJet (FatJetPFCands)
        #events.FatJet.array.content["PFCands"] = type(events.FatJetPFCands.array).fromcounts(events.FatJet.nPFConstituents.flatten(), events.FatJetPFCands.flatten())
        #events.FatJet.array.content["twoProngGru"] = run_model(events.FatJet.flatten())
   
        #else:
        #  events.FatJet["genMatchFull"] = np.ones(len(events))
        fatjets = events.FatJet
        gru = events.GRU
        IN  = events.IN
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['rhocorr'] = 2*np.log(fatjets.msdcorr/fatjets.pt)
        fatjets['gruddt'] = gru.v25 - shift(fatjets,algo='gruddt',year=self._year)
        fatjets['in_v3_ddt'] = IN.v3 - shift(fatjets,algo='inddt',year=self._year)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets,year=self._year)
        #fatjets['count'] = fatjets.count
        if 'WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset: fatjets["genMatchFull"] = genmatch(events)
        else: fatjets["genMatchFull"] = fatjets.pt.zeros_like() #np.zeros(events.size, dtype='bool') 

        candidatejet = fatjets[
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
        ][:, 0:1]

        # basic jet selection
        selection.add('minjetkin', ( 
            (candidatejet.pt >= 450)
            #& (candidatejet.msdcorr >= 40.)
            & (abs(candidatejet.eta) < 2.5)
            & (candidatejet.rhocorr >= -5.5)
            & (candidatejet.rhocorr <= -2)
        ).any())
        selection.add('signal_pt', (
            (candidatejet.pt >= 525)
        ).any())

        selection.add('mass', (candidatejet.msdcorr >= 40.).any())
        selection.add('v_selection_jetkin', ( 
            (candidatejet.pt >= 200)
            & (candidatejet.rhocorr >= -5.5)
            & (candidatejet.rhocorr <= -2)
        ).any())
        selection.add('genmatch', candidatejet.genMatchFull.pad(1).fillna(0).flatten() if ('WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset) else candidatejet.pt.pad(1).fillna(0).flatten().astype(bool))
        #if isRealData:
        #   selection.add('blinding', (
        #      (events.event %10 == 0)
        #   ))
        selection.add('n2ddt', (candidatejet.n2ddt < 0.).any())
        selection.add('jetid', candidatejet.isTight.any())
        selection.add('met', events.MET.pt > 40.) 

        goodmuon = (
            (events.Muon.pt > 10)
            & (abs(events.Muon.eta) < 2.1)
            #& (events.Muon.pfRelIso04_all < 0.4)
            #& (events.Muon.looseId).astype(bool)
        )
        nmuons=goodmuon.sum()
        leadingmuon = events.Muon[goodmuon 
        #& (events.Muon.pt > 55)
        ][:, 0:1]
        muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True)
 
        ngoodmuons = goodmuon[events.Muon.pt > 55].sum()

        selection.add('muonDphiAK8', (
            abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 2*np.pi/3
        ).all().all())

        

        selection.add('muonkin', (
            (leadingmuon.pt > 55.)
            & (abs(leadingmuon.eta) < 2.1)
            #& (leadingmuon.looseId).astype(bool)
        ).all())

        #ak4 puppi jet for CR
        jets = events.Jet[
            (events.Jet.pt > 50.)
            & (abs(events.Jet.eta) < 3)
            & (events.Jet.isTight).astype(bool)
        ]

        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        dr = abs(ak4_ak8_pair.i0.delta_r(ak4_ak8_pair.i1))
        ak4_away = jets[(dr > 0.8).all()]
        #selection.add('ak4btagMedium08', ak4_away.btagDeepB.max() > 0.4941)
        selection.add('ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838)

        #generic lep veto

        nelectrons = (
            (events.Electron.pt > 10.)
            & (abs(events.Electron.eta) < 2.5)
            & (events.Electron.cutBased >= events.Electron.LOOSE)
        ).sum()

        ntaus = (
            (events.Tau.pt > 20.)
            & (events.Tau.idDecayMode).astype(bool)
            # bacon iso looser than Nano selection
        ).sum()
        selection.add('onemuon', (ngoodmuons==1)& (nelectrons == 0) & (ntaus == 0))
        selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('noelectron_notau', (nelectrons == 0) & (ntaus == 0))
     
        if not isRealData: 
            weights.add('genweight', events.genWeight)
            #add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            #add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) signal region only
            bosons = getBosons(events)
            genBosonPt = bosons.pt.pad(1, clip=True).fillna(0)
            add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)  
            #b-tag weights
        regions = {
           'signal'                 : ['fatjet_trigger','minjetkin','signal_pt','mass','noleptons','jetid','genmatch'],
           'ttbar_muoncontrol'      : ['muon_trigger', 'minjetkin','jetid', 'mass', 'muonDphiAK8','muonkin','ak4btagMedium08','onemuon',],
           'noselection' : [],#'vselection_muoncontrol' : ['muon_trigger', 'v_selection_jetkin', 'genmatch', 'jetid', 'ak4btagMedium08', 'muonkin','met'],
        }
        #if isRealData and 'SingleMuon' not in dataset:
        #    regions['signal'].append('blinding')
        '''for region, cuts in regions.items():
            allcuts = set() 
            print ('weights', weights.weight().shape)
            print( len(events)) 
            output['cutflow'].fill(dataset=dataset, region=region, cut=0)#,weight=weights.weight())
            
            for i, cut in enumerate(cuts):
                 
                allcuts.add(cut)
                cut = selection.all(*allcuts)
                output['cutflow'].fill(dataset=dataset, region=region, cut=i + 1)# weight=weights.weight()[cut])
        '''
        allcuts_signal = set()
        output['cutflow_signal'][dataset]['none']+= float(weights.weight().sum())
        allcuts_ttbar_muoncontrol = set()
        output['cutflow_ttbar_muoncontrol'][dataset]['none']+= float(weights.weight().sum())
  
        for cut in regions['signal']:
            allcuts_signal.add(cut)
            output['cutflow_signal'][dataset][cut] += float(weights.weight()[selection.all(*allcuts_signal)].sum())

        for cut in regions['ttbar_muoncontrol']:
            allcuts_ttbar_muoncontrol.add(cut)
            output['cutflow_ttbar_muoncontrol'][dataset][cut] += float(weights.weight()[selection.all(*allcuts_ttbar_muoncontrol)].sum())

        def normalize(val, cut):
            return val[cut].pad(1, clip=True).fillna(0).flatten()

        def fill(region, systematic=None, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            weight = weights.weight()[cut]
            output['templates'].fill(
                dataset=dataset,
                region=region,
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                #gruddt=normalize(candidatejet.gruddt, cut),
                #n2=normalize(candidatejet.n2b1, cut),
                #gru=normalize(candidatejet.twoProngGru, cut),
                #rho=normalize(candidatejet.rhocorr, cut),
                in_v3_ddt=normalize(candidatejet.in_v3_ddt, cut),
                #nPFConstituents=normalize(candidatejet.nPFConstituents, cut),
                #nJet=candidatejet.counts[cut],
                #Vmatch=normalize(candidatejet.genMatchFull, cut),
                mu_pt=normalize(leadingmuon.pt, cut),
                mu_pfRelIso04_all=normalize(leadingmuon.pfRelIso04_all, cut),
                weight=weight,
            )

        for region in regions:
            fill(region)

        return output
Example #24
0
    def process(self, events):
        logging.debug('starting process')
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']
        self._isData = dataset in [
            'SingleMuon', 'DoubleMuon', 'SingleElectron', 'DoubleEG', 'EGamma',
            'MuonEG'
        ]

        selection = processor.PackedSelection()

        # TODO: instead of cutflow, use processor.PackedSelection
        output['cutflow']['all events'] += events.size

        logging.debug('applying lumi mask')
        if self._isData:
            lumiMask = lumi_tools.LumiMask(self._corrections['golden'])
            events['passLumiMask'] = lumiMask(np.array(events.run),
                                              np.array(events.luminosityBlock))
        else:
            events['passLumiMask'] = np.ones_like(events.run, dtype=bool)
        passLumiMask = events.passLumiMask
        selection.add('lumiMask', passLumiMask)

        logging.debug('adding trigger')
        self._add_trigger(events)

        passHLT = events.passHLT
        selection.add('trigger', passHLT)
        output['cutflow']['pass trigger'] += passHLT.sum()
        # if no trigger: fast return
        if passHLT.sum() == 0:
            return output

        # require one good vertex
        logging.debug('checking vertices')
        passGoodVertex = (events.PV.npvsGood > 0)
        output['cutflow']['good vertex'] += passGoodVertex.sum()
        selection.add('goodVertex', passGoodVertex)

        # run rochester
        rochester = self._rochester
        _muon_offsets = events.Muon.pt.offsets
        _charge = events.Muon.charge
        _pt = events.Muon.pt
        _eta = events.Muon.eta
        _phi = events.Muon.phi
        if self._isData:
            _k = rochester.kScaleDT(_charge, _pt, _eta, _phi)
            # _kErr = rochester.kScaleDTerror(_charge, _pt, _eta, _phi)
        else:
            # for default if gen present
            _gpt = events.Muon.matched_gen.pt
            # for backup w/o gen
            _nl = events.Muon.nTrackerLayers
            _u = JaggedArray.fromoffsets(_muon_offsets,
                                         np.random.rand(*_pt.flatten().shape))
            _hasgen = (_gpt.fillna(-1) > 0)
            _kspread = rochester.kSpreadMC(_charge[_hasgen], _pt[_hasgen],
                                           _eta[_hasgen], _phi[_hasgen],
                                           _gpt[_hasgen])
            _ksmear = rochester.kSmearMC(_charge[~_hasgen], _pt[~_hasgen],
                                         _eta[~_hasgen], _phi[~_hasgen],
                                         _nl[~_hasgen], _u[~_hasgen])
            _k = np.ones_like(_pt.flatten())
            _k[_hasgen.flatten()] = _kspread.flatten()
            _k[~_hasgen.flatten()] = _ksmear.flatten()
            _k = JaggedArray.fromoffsets(_muon_offsets, _k)
            # _kErrspread = rochester.kSpreadMCerror(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen],
            #                                        _gpt[_hasgen])
            # _kErrsmear  = rochester.kSmearMCerror(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen],
            #                                       _nl[~_hasgen], _u[~_hasgen])
            # _kErr = np.ones_like(_pt.flatten())
            # _kErr[_hasgen.flatten()] = _kErrspread.flatten()
            # _kErr[~_hasgen.flatten()] = _kErrsmear.flatten()
            # _kErr = JaggedArray.fromoffsets(_muon_offsets, _kErr)

        mask = _pt.flatten() < 200
        rochester_pt = _pt.flatten()
        rochester_pt[mask] = (_k * _pt).flatten()[mask]
        events.Muon['pt'] = JaggedArray.fromoffsets(_muon_offsets,
                                                    rochester_pt)

        logging.debug('adding muon id')
        self._add_muon_id(events.Muon)
        logging.debug('adding electron id')
        self._add_electron_id(events.Electron)

        logging.debug('selecting muons')
        muonId = (events.Muon.passId > 0)
        muons = events.Muon[muonId]

        logging.debug('selecting electrons')
        electronId = (events.Electron.passId > 0)
        electrons = events.Electron[electronId]

        passTwoLeptons = (muons.counts >= 2) | (electrons.counts >= 2)
        output['cutflow']['two leptons'] += passTwoLeptons.sum()
        selection.add('twoLeptons', passTwoLeptons)

        # build cands
        # remake z to have same columns
        # pt eta phi mass charge pdgId
        logging.debug('rebuilding leptons')

        def rebuild(leptons):
            return JaggedCandidateArray.candidatesfromoffsets(
                leptons.offsets,
                pt=leptons.pt.flatten(),
                eta=leptons.eta.flatten(),
                phi=leptons.phi.flatten(),
                mass=leptons.mass.flatten(),
                charge=leptons.charge.flatten(),
                pdgId=leptons.pdgId.flatten(),
                # needed for electron SF
                etaSC=leptons.etaSC.flatten()
                if hasattr(leptons, 'etaSC') else leptons.eta.flatten(),
            )

        newMuons = rebuild(muons)
        newElectrons = rebuild(electrons)

        logging.debug('building 2 leptons')
        ee_cands = newElectrons.choose(2)
        mm_cands = newMuons.choose(2)

        # combine them
        z_cands = JaggedArray.concatenate([ee_cands, mm_cands], axis=1)

        def bestcombination(zcands):
            good_charge = sum(zcands[str(i)]['charge'] for i in range(2)) == 0
            # this keeps the first z cand in each event
            # should instead sort the best first
            # TODO: select best
            zcands = zcands[good_charge][:, :1]
            return zcands

        logging.debug('selecting best combinations')
        z_cands = bestcombination(z_cands)

        z1 = np.zeros_like(z_cands['p4'].pt.flatten(), dtype='i')
        z2 = np.ones_like(z_cands['p4'].pt.flatten(), dtype='i')
        z1[(z_cands['0']['p4'].pt.flatten() <
            z_cands['1']['p4'].pt.flatten())] = 1
        z2[(z_cands['0']['p4'].pt.flatten() <
            z_cands['1']['p4'].pt.flatten())] = 0
        z1 = JaggedArray.fromoffsets(z_cands.offsets, z1)
        z2 = JaggedArray.fromoffsets(z_cands.offsets, z2)

        passZCand = (z_cands.counts > 0)
        output['cutflow']['z cand'] += passZCand.sum()
        selection.add('zCand', passZCand)

        passMassWindow = (passZCand & z_cands[(
            (z_cands.p4.mass > 60) & (z_cands.p4.mass < 120))].counts > 0)
        output['cutflow']['mass window'] += passMassWindow.sum()
        selection.add('massWindow', passMassWindow)

        # im sure there is a better way, but for now just do this
        def get_lepton_values(zl, key):
            val = np.zeros_like(zl.flatten(), dtype=float)
            if len(val) == 0:
                return JaggedArray.fromoffsets(zl.offsets, val)
            for i in range(2):
                mask = (i == zl.flatten())
                if key == 'pt':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].pt
                elif key == 'eta':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].eta
                elif key == 'phi':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].phi
                elif key == 'mass':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].mass
                else:
                    val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key]
            return JaggedArray.fromoffsets(zl.offsets, val)

        z1pt = get_lepton_values(z1, 'pt')
        z2pt = get_lepton_values(z2, 'pt')
        passPt = ((z1pt > 30) & (z2pt > 20)).counts > 0
        output['cutflow']['pt threshold'] += passPt.sum()
        selection.add('ptThreshold', passPt)

        chanSels = {}
        z1pdg = get_lepton_values(z1, 'pdgId')
        z2pdg = get_lepton_values(z2, 'pdgId')
        for chan in ['ee', 'mm']:
            if chan == 'ee':
                pdgIds = (11, 11)
            if chan == 'mm':
                pdgIds = (13, 13)
            chanSels[chan] = ((abs(z1pdg) == pdgIds[0])
                              & (abs(z2pdg) == pdgIds[1]))

        weights = processor.Weights(events.run.size)
        if self._isData:
            output['sumw'][dataset] = 0  # always set to 0 for data
        else:
            output['sumw'][dataset] += events.genWeight.sum()
            weights.add('genWeight', events.genWeight)
            weights.add(
                'pileupWeight',
                self._corrections['pileupWeight'](events.Pileup.nPU),
                self._corrections['pileupWeightUp'](events.Pileup.nPU),
                self._corrections['pileupWeightDown'](events.Pileup.nPU),
            )
            zls = [z1, z2]
            # electron sf
            for ei, zl in enumerate(zls):
                ei = str(ei)
                eta = get_lepton_values(zl, 'etaSC')
                pt = get_lepton_values(zl, 'pt')
                electronRecoSF = self._corrections['electron_reco'](eta, pt)
                electronIdSF = self._corrections['electron_id_MVA90'](eta, pt)
                electronSF = np.ones_like(electronRecoSF.prod())
                if ei in ['0', '1']:
                    chans = ['ee']
                else:
                    chans = []
                for chan in chans:
                    # turns empty arrays into 0's, nonempty int 1's
                    chanSel = (chanSels[chan].ones_like().sum() > 0)
                    electronSF[chanSel] *= electronRecoSF[chanSel].prod()
                    electronSF[chanSel] *= electronIdSF[chanSel].prod()
                weights.add('electronSF' + ei, electronSF)

            # muon SF
            for mi, zl in enumerate(zls):
                mi = str(mi)
                eta = get_lepton_values(zl, 'eta')
                pt = get_lepton_values(zl, 'pt')
                if self._year == '2016':
                    idSF = self._corrections['muon_id_MediumID'](eta, pt)
                    isoSF = self._corrections['muon_iso_TightRelIso_MediumID'](
                        eta, pt)
                else:
                    idSF = self._corrections['muon_id_MediumPromptID'](
                        pt, abs(eta))
                    isoSF = self._corrections['muon_iso_TightRelIso_MediumID'](
                        pt, abs(eta))

                muonSF = np.ones_like(idSF.prod())
                if mi in ['0', '1']:
                    chans = ['mm']
                else:
                    chans = []
                for chan in chans:
                    # turns empty arrays into 0's, nonempty int 1's
                    chanSel = (chanSels[chan].ones_like().sum() > 0)
                    muonSF[chanSel] *= idSF[chanSel].prod()
                    muonSF[chanSel] *= isoSF[chanSel].prod()
                weights.add('muonSF' + mi, muonSF)

        logging.debug('filling')
        for sel in self._selections:
            if sel == 'massWindow':
                cut = selection.all('lumiMask', 'trigger', 'goodVertex',
                                    'twoLeptons', 'zCand', 'massWindow',
                                    'ptThreshold')
            for chan in ['ee', 'mm']:
                chanSel = chanSels[chan]
                weight = chanSel.astype(float) * weights.weight()

                output[sel + '_zmass'].fill(
                    dataset=dataset,
                    channel=chan,
                    mass=z_cands[cut].p4.mass.flatten(),
                    weight=weight[cut].flatten(),
                )
                output[sel + '_met'].fill(
                    dataset=dataset,
                    channel=chan,
                    met=events.MET.pt[cut],
                    weight=weight[cut].flatten(),
                )
                output[sel + '_pileup'].fill(
                    dataset=dataset,
                    channel=chan,
                    npvs=events.PV.npvs[cut],
                    weight=weight[cut].flatten(),
                )

        return output
Example #25
0
    def process(self, df):
        output = self.accumulator.identity()
        if df.size == 0: return output

        dataset = df['dataset']
        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        ak4jets = JaggedCandidateArray.candidatesfromcounts(
            df['akjet_ak4PFJetsCHS_p4'],
            px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'].content,
            py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'].content,
            pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'].content,
            energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'].content,
            jetid=df['akjet_ak4PFJetsCHS_jetid'].content,
            muefrac=df['akjet_ak4PFJetsCHS_muonEnergyFraction'].content,
            chaemefrac=df['akjet_ak4PFJetsCHS_chaEmEnergyFraction'].content,
            emefrac=df['akjet_ak4PFJetsCHS_emEnergyFraction'].content,
            hadfrac=df['akjet_ak4PFJetsCHS_hadronEnergyFraction'].content,
            chahadfrac=df['akjet_ak4PFJetsCHS_chaHadEnergyFraction'].content,
            deepcsv=df['hftagscore_DeepCSV_b'].content,
        )
        deepcsv_tight = np.bitwise_and(ak4jets.deepcsv, 1 << 2) == (1 << 2)
        ak4jets.add_attributes(deepcsvTight=deepcsv_tight, )
        ak4jets = ak4jets[ak4jets.jetid & (ak4jets.pt > 30) &
                          (np.abs(ak4jets.eta) < 2.4)]

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'].content,
            py=df['pfjet_p4.fCoordinates.fY'].content,
            pz=df['pfjet_p4.fCoordinates.fZ'].content,
            energy=df['pfjet_p4.fCoordinates.fT'].content,
            vx=df['pfjet_klmvtx.fCoordinates.fX'].content,
            vy=df['pfjet_klmvtx.fCoordinates.fY'].content,
            vz=df['pfjet_klmvtx.fCoordinates.fZ'].content,
            mintkdist=df['pfjet_pfcands_minTwoTkDist'].content,
        )
        leptonjets.add_attributes(vxy=np.hypot(leptonjets.vx, leptonjets.vy))
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype == 3).sum()
        ndsa = (ljdautype == 8).sum()
        isegammajet = (npfmu == 0) & (ndsa == 0)
        ispfmujet = (npfmu >= 2) & (ndsa == 0)
        isdsajet = ndsa > 0
        label = isegammajet.astype(int) * 1 + ispfmujet.astype(
            int) * 2 + isdsajet.astype(int) * 3
        leptonjets.add_attributes(label=label)
        nmu = ((ljdautype == 3) | (ljdautype == 8)).sum()
        leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0))
        ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum()
        leptonjets.add_attributes(qsum=ljdaucharge)
        leptonjets.add_attributes(
            isneutral=(leptonjets.iseltype
                       | (leptonjets.ismutype & (leptonjets.qsum == 0))))
        leptonjets.add_attributes(
            displaced=((leptonjets.vxy >= 5) |
                       (np.isnan(leptonjets.vxy) & leptonjets.ismutype)
                       ))  # non-vertex treated as displaced too
        ljdsamuSubset = fromNestNestIndexArray(
            df['dsamuon_isSubsetFilteredCosmic1Leg'],
            awkward.fromiter(df['pfjet_pfcand_dsamuonIdx']))
        leptonjets.add_attributes(nocosmic=(ljdsamuSubset.sum() == 0))

        leptonjets = leptonjets[(leptonjets.isneutral) & (leptonjets.nocosmic)
                                & (leptonjets.pt > 30) &
                                (leptonjets.mintkdist < 50)]

        # mask_ = ak4jets.match(leptonjets, deltaRCut=0.4)
        # ak4jets = ak4jets[~mask_]

        ## __ twoleptonjets__
        twoleptonjets = (leptonjets.counts >= 2) & (leptonjets.ismutype.sum()
                                                    >= 1)
        dileptonjets = leptonjets[twoleptonjets]
        ak4jets = ak4jets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size == 0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum() == 1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents
                         & muljInLeading2Events).astype(int) * 1

        doubleMuljEvents = dileptonjets.ismutype.sum() == 2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        ak4jets = ak4jets[ak4jets.pt > (lj0.pt.flatten())]

        output['njets'].fill(dataset=dataset,
                             cnt=ak4jets.counts,
                             weight=wgt,
                             channel=channel_)
        if ak4jets.flatten().size != 0:
            ak4jets = ak4jets[(ak4jets.pt > 30) & (np.abs(ak4jets.eta) < 2.4) &
                              (ak4jets.deepcsvTight)]
        output['ntightb'].fill(dataset=dataset,
                               cnt=ak4jets.counts,
                               weight=wgt,
                               channel=channel_)

        return output
    def process(self, events):

        # Initialize accumulator
        out = self.accumulator.identity()
        dataset = sample_name
        #events.metadata['dataset']

        # Data or MC
        isData = 'genWeight' not in events.fields

        #Stop processing if there is no event remain
        if len(events) == 0:
            return out

        # Golden Json file
        if (self._year == "2018") and isData:
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABCD"

        if (self._year == "2017") and isData:
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt"

        # <----- Get Scale factors ------>#

        if not isData:

            # Egamma reco ID
            get_ele_reco_above20_sf = self._corrections[
                'get_ele_reco_above20_sf'][self._year]
            get_ele_medium_id_sf = self._corrections['get_ele_medium_id_sf'][
                self._year]
            get_pho_medium_id_sf = self._corrections['get_pho_medium_id_sf'][
                self._year]

            # DoubleEG trigger # 2016, 2017 are not applied yet
            if self._year == "2018":
                get_ele_trig_leg1_SF = self._corrections[
                    'get_ele_trig_leg1_SF'][self._year]
                get_ele_trig_leg1_data_Eff = self._corrections[
                    'get_ele_trig_leg1_data_Eff'][self._year]
                get_ele_trig_leg1_mc_Eff = self._corrections[
                    'get_ele_trig_leg1_mc_Eff'][self._year]
                get_ele_trig_leg2_SF = self._corrections[
                    'get_ele_trig_leg2_SF'][self._year]
                get_ele_trig_leg2_data_Eff = self._corrections[
                    'get_ele_trig_leg2_data_Eff'][self._year]
                get_ele_trig_leg2_mc_Eff = self._corrections[
                    'get_ele_trig_leg2_mc_Eff'][self._year]

            # PU weight with custom made npy and multi-indexing
            pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64")
            pu = self._puweight_arr[pu_weight_idx]

        selection = processor.PackedSelection()

        # Cut flow
        cut0 = np.zeros(len(events))

        # <----- Helper functions ------>#

        #  Sort by PT  helper function
        def sort_by_pt(ele, pho, jet):
            ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)]
            pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)]
            jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)]

            return ele, pho, jet

        # Lorentz vectors
        from coffea.nanoevents.methods import vector
        ak.behavior.update(vector.behavior)

        def TLorentz_vector(vec):
            vec = ak.zip({
                "x": vec.x,
                "y": vec.y,
                "z": vec.z,
                "t": vec.t
            },
                         with_name="LorentzVector")
            return vec

        def TLorentz_vector_cylinder(vec):

            vec = ak.zip(
                {
                    "pt": vec.pt,
                    "eta": vec.eta,
                    "phi": vec.phi,
                    "mass": vec.mass,
                },
                with_name="PtEtaPhiMLorentzVector",
            )

            return vec

        # Cut-based ID modification
        @numba.njit
        def PhotonVID(vid, idBit):
            rBit = 0
            for x in range(0, 7):
                rBit |= (1 << x) if ((vid >> (x * 2)) & 0b11 >= idBit) else 0
            return rBit

        # Inverse Sieie and upper limit
        @numba.njit
        def make_fake_obj_mask(Pho, builder):

            #for eventIdx,pho in enumerate(tqdm(Pho)):   # --Event Loop
            for eventIdx, pho in enumerate(Pho):
                builder.begin_list()
                if len(pho) < 1: continue

                for phoIdx, _ in enumerate(pho):  # --Photon Loop

                    vid = Pho[eventIdx][phoIdx].vidNestedWPBitmap
                    vid_cuts1 = PhotonVID(vid, 1)  # Loose photon
                    vid_cuts2 = PhotonVID(vid, 2)  # Medium photon
                    vid_cuts3 = PhotonVID(vid, 3)  # Tight photon

                    # Field name
                    # |0|0|0|0|0|0|0|
                    # |IsoPho|IsoNeu|IsoChg|Sieie|hoe|scEta|PT|

                    # 1. Turn off cut (ex turn off Sieie
                    # |1|1|1|0|1|1|1| = |1|1|1|0|1|1|1|

                    # 2. Inverse cut (ex inverse Sieie)
                    # |1|1|1|1|1|1|1| = |1|1|1|0|1|1|1|

                    #if (vid_cuts2 & 0b1111111 == 0b1111111): # Cut applied
                    #if (vid_cuts2 & 0b1111111 == 0b1110111): # Inverse Sieie
                    if (vid_cuts2 & 0b1110111 == 0b1110111):  # Without Sieie

                        builder.boolean(True)

                    else:

                        builder.boolean(False)

                builder.end_list()

            return builder

        # <----- Selection ------>#

        Initial_events = events
        # Good Run ( Golden Json files )
        from coffea import lumi_tools

        if isData:
            lumi_mask_builder = lumi_tools.LumiMask(injson)
            lumimask = ak.Array(
                lumi_mask_builder.__call__(events.run, events.luminosityBlock))
            events = events[lumimask]
            #print("{0}%  of files pass good-run conditions".format(len(events)/ len(Initial_events)))

        # Stop processing if there is no event remain
        if len(events) == 0:
            return out

        ##----------- Cut flow1: Passing Triggers

        # double lepton trigger
        is_double_ele_trigger = True
        if not is_double_ele_trigger:
            double_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._doubleelectron_triggers[self._year]:
                if path not in events.HLT.fields: continue
                double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[
                    path]

        # single lepton trigger
        is_single_ele_trigger = True
        if not is_single_ele_trigger:
            single_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._singleelectron_triggers[self._year]:
                if path not in events.HLT.fields: continue
                single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[
                    path]

        events.Electron, events.Photon, events.Jet = sort_by_pt(
            events.Electron, events.Photon, events.Jet)

        # Good Primary vertex
        nPV = events.PV.npvsGood
        if not isData: nPV = nPV * pu
        nPV_nw = nPV

        # Apply cut1
        events = events[double_ele_triggers_arr]
        if not isData: pu = pu[double_ele_triggers_arr]

        cut1 = np.ones(len(events))

        # Set Particles
        Electron = events.Electron
        Muon = events.Muon
        Photon = events.Photon
        MET = events.MET
        Jet = events.Jet

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        # --Gen Photon for dR
        genparts = events.GenPart
        pdgID_mask = (genparts.pdgId == 22)
        # mask2: isPrompt | fromHardProcess | isLastCopy
        mask2 = (1 << 0) | (1 << 8) | (1 << 13)
        # https://github.com/PKUHEPEWK/WGamma/blob/master/2018/wgRealPhotonTemplateModule.py

        status_mask = ((genparts.statusFlags & mask2) == mask2)
        gen_photons = genparts[pdgID_mask & status_mask]

        assert (ak.all(ak.num(gen_photons) == 1)
                )  # Raise error if len(gen_photon) != 1

        #  --Muon ( only used to calculate dR )
        MuSelmask = (Muon.pt >= 10) & (abs(
            Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15)
        Muon = Muon[MuSelmask]

        ##----------- Cut flow2: Electron Selection

        EleSelmask = ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479)  &  (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1)) | \
           ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2))

        Electron = Electron[EleSelmask]

        # apply cut 2
        Tri_electron_mask = ak.num(Electron) >= 2
        Electron = Electron[Tri_electron_mask]
        Photon = Photon[Tri_electron_mask]
        Jet = Jet[Tri_electron_mask]
        MET = MET[Tri_electron_mask]
        Muon = Muon[Tri_electron_mask]
        if not isData: pu = pu[Tri_electron_mask]
        events = events[Tri_electron_mask]
        gen_photons = gen_photons[Tri_electron_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut2 = np.ones(len(Photon)) * 2

        ##----------- Cut flow3: Photon Selection

        # Basic photon selection
        isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) &
                                                  (abs(Photon.eta) < 2.5))
        Pixel_seed_mask = ~Photon.pixelSeed
        PT_mask = Photon.pt >= 20

        # dR cut with selected Muon and Electrons
        dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5,
                                 axis=-1)  # default metric table: delta_r
        dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1)

        PhoSelmask = PT_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask
        Photon = Photon[PhoSelmask]

        # Apply cut 3
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        if not isData: pu = pu[A_photon_mask]
        events = events[A_photon_mask]
        gen_photons = gen_photons[A_photon_mask]

        Photon_template_mask = make_fake_obj_mask(
            Photon, ak.ArrayBuilder()).snapshot()
        Photon = Photon[Photon_template_mask]

        # Apply cut 3
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        if not isData: pu = pu[A_photon_mask]
        events = events[A_photon_mask]
        gen_photons = gen_photons[A_photon_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut3 = np.ones(len(Photon)) * 3

        ## --  Additional photon selection: Photon gen-matching

        # Choose Photons that dR(genPhoton,Photon) <= 0.1
        gen_match_photon_mask = ak.all(Photon.metric_table(gen_photons) <= 0.1,
                                       axis=-1)

        # Apply cut
        Photon = Photon[gen_match_photon_mask]
        gen_match_photon_evt_mask = ak.num(Photon) >= 1

        Electron = Electron[gen_match_photon_evt_mask]
        Photon = Photon[gen_match_photon_evt_mask]
        Jet = Jet[gen_match_photon_evt_mask]
        MET = MET[gen_match_photon_evt_mask]
        gen_photons = gen_photons[gen_match_photon_evt_mask]
        if not isData: pu = pu[gen_match_photon_evt_mask]
        events = events[gen_match_photon_evt_mask]

        ##-----------  Cut flow4:  Select 2 OSSF electrons from Z
        @numba.njit
        def find_2lep(events_leptons, builder):
            for leptons in events_leptons:

                builder.begin_list()
                nlep = len(leptons)
                for i0 in range(nlep):
                    for i1 in range(i0 + 1, nlep):
                        if leptons[i0].charge + leptons[i1].charge != 0:
                            continue

                        if nlep == 2:
                            builder.begin_tuple(2)
                            builder.index(0).integer(i0)
                            builder.index(1).integer(i1)
                            builder.end_tuple()

                        else:
                            for i2 in range(nlep):
                                if len({i0, i1, i2}) < 3: continue
                                builder.begin_tuple(3)
                                builder.index(0).integer(i0)
                                builder.index(1).integer(i1)
                                builder.index(2).integer(i2)
                                builder.end_tuple()
                builder.end_list()
            return builder

        ossf_idx = find_2lep(Electron, ak.ArrayBuilder()).snapshot()

        # OSSF cut
        ossf_mask = ak.num(ossf_idx) >= 1
        ossf_idx = ossf_idx[ossf_mask]
        Electron = Electron[ossf_mask]
        Photon = Photon[ossf_mask]
        Jet = Jet[ossf_mask]
        MET = MET[ossf_mask]
        events = events[ossf_mask]
        if not isData: pu = pu[ossf_mask]

        Double_electron = [Electron[ossf_idx[idx]] for idx in "01"]
        from coffea.nanoevents.methods import vector
        ak.behavior.update(vector.behavior)

        Diele = ak.zip({
            "lep1":
            Double_electron[0],
            "lep2":
            Double_electron[1],
            "p4":
            TLorentz_vector(Double_electron[0] + Double_electron[1])
        })

        bestZ_idx = ak.singletons(
            ak.argmin(abs(Diele.p4.mass - 91.1876), axis=1))
        Diele = Diele[bestZ_idx]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out
        cut4 = np.ones(len(Electron)) * 4

        leading_ele = Diele.lep1
        subleading_ele = Diele.lep2

        def make_leading_pair(target, base):
            return target[ak.argmax(base.pt, axis=1, keepdims=True)]

        leading_pho = make_leading_pair(Photon, Photon)

        # -- Scale Factor for each electron

        # Trigger weight helper function
        def Trigger_Weight(eta1, pt1, eta2, pt2):
            per_ev_MC =\
            get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\
            get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\
            get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2)

            per_ev_data =\
            get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\
            get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\
            get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2)

            return per_ev_data / per_ev_MC

        if not isData:

            ## -------------< Egamma ID and Reco Scale factor > -----------------##
            get_pho_medium_id_sf = get_pho_medium_id_sf(
                ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt))

            ele_reco_sf = get_ele_reco_above20_sf(
                ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
                ak.flatten(leading_ele.pt)) * get_ele_reco_above20_sf(
                    ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
                    ak.flatten(subleading_ele.pt))

            ele_medium_id_sf = get_ele_medium_id_sf(
                ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
                ak.flatten(leading_ele.pt)) * get_ele_medium_id_sf(
                    ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
                    ak.flatten(subleading_ele.pt))

            ## -------------< Double Electron Trigger Scale factor > -----------------##
            eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta)
            eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta)
            pt1 = ak.flatten(leading_ele.pt)
            pt2 = ak.flatten(subleading_ele.pt)

            # -- 2017,2016 are not applied yet
            if self._year == '2018':
                ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2)

        ##----------- Cut flow5: Event selection

        # Mee cut
        Mee_cut_mask = ak.firsts(Diele.p4.mass) > 4

        # Electron PT cuts
        Elept_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20))

        # MET cuts
        MET_mask = MET.pt > 20

        # --------Mask -------#
        Event_sel_mask = Mee_cut_mask & Elept_mask & MET_mask
        Diele_sel = Diele[Event_sel_mask]
        leading_pho_sel = leading_pho[Event_sel_mask]
        Jet_sel = Jet[Event_sel_mask]
        MET_sel = MET[Event_sel_mask]

        # Photon  EE and EB
        isEE_mask = leading_pho.isScEtaEE
        isEB_mask = leading_pho.isScEtaEB
        Pho_EE = leading_pho[isEE_mask & Event_sel_mask]
        Pho_EB = leading_pho[isEB_mask & Event_sel_mask]

        #Stop processing if there is no event remain
        if len(leading_pho_sel) == 0:
            return out

        cut5 = np.ones(len(Diele)) * 5

        # -------------------- Flatten variables ---------------------------#

        # -- Ele1 --#
        Ele1_PT = ak.flatten(Diele_sel.lep1.pt)
        Ele1_Eta = ak.flatten(Diele_sel.lep1.eta)
        Ele1_Phi = ak.flatten(Diele_sel.lep1.phi)

        # -- Ele2 --#
        Ele2_PT = ak.flatten(Diele_sel.lep2.pt)
        Ele2_Eta = ak.flatten(Diele_sel.lep2.eta)
        Ele2_Phi = ak.flatten(Diele_sel.lep2.phi)

        # -- Pho -- #
        Pho_PT = ak.flatten(leading_pho_sel.pt)
        Pho_Eta = ak.flatten(leading_pho_sel.eta)
        Pho_Phi = ak.flatten(leading_pho_sel.phi)

        # -- Pho EB --#
        Pho_EB_PT = ak.flatten(Pho_EB.pt)
        Pho_EB_Eta = ak.flatten(Pho_EB.eta)
        Pho_EB_Phi = ak.flatten(Pho_EB.phi)
        Pho_EB_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg)
        Pho_EB_Sieie = ak.flatten(Pho_EE.sieie)

        # -- Pho EE --#
        Pho_EE_PT = ak.flatten(Pho_EE.pt)
        Pho_EE_Eta = ak.flatten(Pho_EE.eta)
        Pho_EE_Phi = ak.flatten(Pho_EE.phi)
        Pho_EE_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg)
        Pho_EE_Sieie = ak.flatten(Pho_EE.sieie)

        # --Kinematics --#
        Diele_mass = ak.flatten(Diele_sel.p4.mass)

        leading_ele, subleading_ele = ak.flatten(
            TLorentz_vector_cylinder(Diele_sel.lep1)), ak.flatten(
                TLorentz_vector_cylinder(Diele_sel.lep2))
        dR_e1pho = ak.flatten(
            leading_ele.delta_r(leading_pho_sel))  # dR pho,ele1
        dR_e2pho = ak.flatten(
            subleading_ele.delta_r(leading_pho_sel))  # dR pho,ele2
        dR_jpho = ak.flatten(Jet_sel[:, 0].delta_r(leading_pho_sel))

        MET_PT = ak.to_numpy(MET_sel.pt)

        # -------------------- Sieie bins---------------------------#
        def make_bins(pt, eta, sieie, bin_range_str):

            bin_dict = {
                'PT_1_eta_1': (pt > 20) & (pt < 30) & (eta < 1),
                'PT_1_eta_2': (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5),
                'PT_1_eta_3': (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2),
                'PT_1_eta_4': (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5),
                'PT_2_eta_1': (pt > 30) & (pt < 40) & (eta < 1),
                'PT_2_eta_2': (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5),
                'PT_2_eta_3': (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2),
                'PT_2_eta_4': (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5),
                'PT_3_eta_1': (pt > 40) & (pt < 50) & (eta < 1),
                'PT_3_eta_2': (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5),
                'PT_3_eta_3': (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2),
                'PT_3_eta_4': (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5),
                'PT_4_eta_1': (pt > 50) & (eta < 1),
                'PT_4_eta_2': (pt > 50) & (eta > 1) & (eta < 1.5),
                'PT_4_eta_3': (pt > 50) & (eta > 1.5) & (eta < 2),
                'PT_4_eta_4': (pt > 50) & (eta > 2) & (eta < 2.5)
            }

            binmask = bin_dict[bin_range_str]

            return ak.to_numpy(sieie[binmask]), binmask

        bin_name_list = [
            'PT_1_eta_1', 'PT_1_eta_2', 'PT_1_eta_3', 'PT_1_eta_4',
            'PT_2_eta_1', 'PT_2_eta_2', 'PT_2_eta_3', 'PT_2_eta_4',
            'PT_3_eta_1', 'PT_3_eta_2', 'PT_3_eta_3', 'PT_3_eta_4',
            'PT_4_eta_1', 'PT_4_eta_2', 'PT_4_eta_3', 'PT_4_eta_4'
        ]

        binned_sieie_hist = {}
        binmask_dict = {}
        for name in bin_name_list:
            binned_sieie_hist[name], _ = make_bins(
                ak.flatten(leading_pho_sel.pt),
                ak.flatten(abs(leading_pho_sel.eta)),
                ak.flatten(leading_pho_sel.sieie), name)
            _, binmask_dict[name] = make_bins(ak.flatten(leading_pho.pt),
                                              ak.flatten(abs(leading_pho.eta)),
                                              ak.flatten(leading_pho.sieie),
                                              name)

        print("Show me the last bin: ", binned_sieie_hist['PT_4_eta_4'])

        # --- Apply weight and hist
        weights = processor.Weights(len(cut4))

        # --- skim cut-weight
        def skim_weight(arr):
            mask1 = ~ak.is_none(arr)
            subarr = arr[mask1]
            mask2 = subarr != 0
            return ak.to_numpy(subarr[mask2])

        cuts = Event_sel_mask
        cuts_pho_EE = ak.flatten(isEE_mask)
        cuts_pho_EB = ak.flatten(isEB_mask)

        print(
            "cut0: {0}, cut1: {1}, cut2: {2}, cut3: {3}, cut4: {4} ,cut5 {5} ".
            format(len(Initial_events), len(cut1), len(cut2), len(cut3),
                   len(cut4), len(cut5)))

        # Weight and SF here
        if not isData:
            weights.add('pileup', pu)
            weights.add('ele_id', ele_medium_id_sf)
            weights.add('pho_id', get_pho_medium_id_sf)
            weights.add('ele_reco', ele_reco_sf)

            # 2016,2017 are not applied yet
            if self._year == "2018":
                weights.add('ele_trigger', ele_trig_weight)

        # ---------------------------- Fill hist --------------------------------------#

        # Initial events
        out["sumw"][dataset] += len(Initial_events)

        # Cut flow loop
        for cut in [cut0, cut1, cut2, cut3, cut4, cut5]:
            out["cutflow"].fill(dataset=dataset, cutflow=cut)

        # Primary vertex
        out['nPV'].fill(
            dataset=dataset,
            nPV=nPV,
        )
        out['nPV_nw'].fill(dataset=dataset, nPV_nw=nPV_nw)

        # Fill hist

        # -- met -- #
        out["met"].fill(dataset=dataset,
                        met=MET_PT,
                        weight=skim_weight(weights.weight() * cuts))

        # --mass -- #
        out["mass"].fill(dataset=dataset,
                         mass=Diele_mass,
                         weight=skim_weight(weights.weight() * cuts))
        # -- Ele1 -- #
        out["ele1pt"].fill(dataset=dataset,
                           ele1pt=Ele1_PT,
                           weight=skim_weight(weights.weight() * cuts))
        out["ele1eta"].fill(dataset=dataset,
                            ele1eta=Ele1_Eta,
                            weight=skim_weight(weights.weight() * cuts))
        out["ele1phi"].fill(dataset=dataset,
                            ele1phi=Ele1_Phi,
                            weight=skim_weight(weights.weight() * cuts))

        # --Ele2 --#
        out["ele2pt"].fill(dataset=dataset,
                           ele2pt=Ele2_PT,
                           weight=skim_weight(weights.weight() * cuts))
        out["ele2eta"].fill(dataset=dataset,
                            ele2eta=Ele2_Eta,
                            weight=skim_weight(weights.weight() * cuts))
        out["ele2phi"].fill(dataset=dataset,
                            ele2phi=Ele2_Phi,
                            weight=skim_weight(weights.weight() * cuts))

        # -- Photon -- #

        out["phopt"].fill(dataset=dataset,
                          phopt=Pho_PT,
                          weight=skim_weight(weights.weight() * cuts))
        out["phoeta"].fill(dataset=dataset,
                           phoeta=Pho_Eta,
                           weight=skim_weight(weights.weight() * cuts))
        out["phophi"].fill(dataset=dataset,
                           phophi=Pho_Phi,
                           weight=skim_weight(weights.weight() * cuts))

        # -- Binned sieie hist -- #
        if len(binned_sieie_hist['PT_1_eta_1'] > 0):
            out['PT_1_eta_1'].fill(dataset=dataset,
                                   PT_1_eta_1=binned_sieie_hist['PT_1_eta_1'])
        if len(binned_sieie_hist['PT_1_eta_2'] > 0):
            out['PT_1_eta_2'].fill(dataset=dataset,
                                   PT_1_eta_2=binned_sieie_hist['PT_1_eta_2'])
        if len(binned_sieie_hist['PT_1_eta_3'] > 0):
            out['PT_1_eta_3'].fill(dataset=dataset,
                                   PT_1_eta_3=binned_sieie_hist['PT_1_eta_3'])
        if len(binned_sieie_hist['PT_1_eta_4'] > 0):
            out['PT_1_eta_4'].fill(dataset=dataset,
                                   PT_1_eta_4=binned_sieie_hist['PT_1_eta_4'])
        if len(binned_sieie_hist['PT_2_eta_1'] > 0):
            out['PT_2_eta_1'].fill(dataset=dataset,
                                   PT_2_eta_1=binned_sieie_hist['PT_2_eta_1'])
        if len(binned_sieie_hist['PT_2_eta_2'] > 0):
            out['PT_2_eta_2'].fill(dataset=dataset,
                                   PT_2_eta_2=binned_sieie_hist['PT_2_eta_2'])
        if len(binned_sieie_hist['PT_2_eta_3'] > 0):
            out['PT_2_eta_3'].fill(dataset=dataset,
                                   PT_2_eta_3=binned_sieie_hist['PT_2_eta_3'])
        if len(binned_sieie_hist['PT_2_eta_4'] > 0):
            out['PT_2_eta_4'].fill(dataset=dataset,
                                   PT_2_eta_4=binned_sieie_hist['PT_2_eta_4'])
        if len(binned_sieie_hist['PT_3_eta_1'] > 0):
            out['PT_3_eta_1'].fill(dataset=dataset,
                                   PT_3_eta_1=binned_sieie_hist['PT_3_eta_1'])
        if len(binned_sieie_hist['PT_3_eta_2'] > 0):
            out['PT_3_eta_2'].fill(dataset=dataset,
                                   PT_3_eta_2=binned_sieie_hist['PT_3_eta_2'])
        if len(binned_sieie_hist['PT_3_eta_3'] > 0):
            out['PT_3_eta_3'].fill(dataset=dataset,
                                   PT_3_eta_3=binned_sieie_hist['PT_3_eta_3'])
        if len(binned_sieie_hist['PT_3_eta_4'] > 0):
            out['PT_3_eta_4'].fill(dataset=dataset,
                                   PT_3_eta_4=binned_sieie_hist['PT_3_eta_4'])
        if len(binned_sieie_hist['PT_4_eta_1'] > 0):
            out['PT_4_eta_1'].fill(dataset=dataset,
                                   PT_4_eta_1=binned_sieie_hist['PT_4_eta_1'])
        if len(binned_sieie_hist['PT_4_eta_2'] > 0):
            out['PT_4_eta_2'].fill(dataset=dataset,
                                   PT_4_eta_2=binned_sieie_hist['PT_4_eta_2'])
        if len(binned_sieie_hist['PT_4_eta_3'] > 0):
            out['PT_4_eta_3'].fill(dataset=dataset,
                                   PT_4_eta_3=binned_sieie_hist['PT_4_eta_3'])
        if len(binned_sieie_hist['PT_4_eta_4'] > 0):
            out['PT_4_eta_4'].fill(dataset=dataset,
                                   PT_4_eta_4=binned_sieie_hist['PT_4_eta_4'])

        return out
    def process(self, df):
        output = self.accumulator.identity()
        if df.size == 0: return output

        dataset = df['dataset']
        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'],
            py=df['pfjet_p4.fCoordinates.fY'],
            pz=df['pfjet_p4.fCoordinates.fZ'],
            energy=df['pfjet_p4.fCoordinates.fT'],
            pfisoAll05=df['pfjet_pfIsolation05'],
            pfisoNopu05=df['pfjet_pfIsolationNoPU05'],
            pfisoDbeta=df['pfjet_pfiso'],
            ncands=df['pfjet_pfcands_n'],
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype == 3).sum()
        ndsa = (ljdautype == 8).sum()
        isegammajet = (npfmu == 0) & (ndsa == 0)
        ispfmujet = (npfmu >= 2) & (ndsa == 0)
        isdsajet = ndsa > 0
        label = isegammajet.astype(int) * 1 + ispfmujet.astype(
            int) * 2 + isdsajet.astype(int) * 3
        leptonjets.add_attributes(label=label)
        nmu = ((ljdautype == 3) | (ljdautype == 8)).sum()
        leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0))

        ## __twoleptonjets__
        twoleptonjets = leptonjets.counts >= 2
        dileptonjets = leptonjets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size == 0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum() == 1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents
                         & muljInLeading2Events).astype(int) * 1

        doubleMuljEvents = dileptonjets.ismutype.sum() == 2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        isControl = (np.abs(lj0.p4.delta_phi(lj1.p4)) < np.pi / 2).flatten()

        ## __isControl__
        if self.dphi_control:
            dileptonjets = dileptonjets[isControl]
            wgt = wgt[isControl]
            lj0 = lj0[isControl]
            lj1 = lj1[isControl]
            channel_ = channel_[isControl]
        else:
            dileptonjets = dileptonjets
        if dileptonjets.size == 0: return output

        if self.data_type == 'bkg':
            wgt *= bkgSCALE[dataset]

        output['all05'] += processor.column_accumulator(
            dileptonjets.pfisoAll05.flatten())
        output['nopu05'] += processor.column_accumulator(
            dileptonjets.pfisoNopu05.flatten())
        output['dbeta'] += processor.column_accumulator(
            dileptonjets.pfisoDbeta.flatten())
        output['all05w'] += processor.column_accumulator(
            (dileptonjets.pfisoAll05 / dileptonjets.ncands).flatten())
        output['nopu05w'] += processor.column_accumulator(
            (dileptonjets.pfisoNopu05 / dileptonjets.ncands).flatten())
        output['dbetaw'] += processor.column_accumulator(
            (dileptonjets.pfisoDbeta / dileptonjets.ncands).flatten())
        output['pt'] += processor.column_accumulator(dileptonjets.pt.flatten())
        output['eta'] += processor.column_accumulator(
            dileptonjets.eta.flatten())
        output['wgt'] += processor.column_accumulator(
            (dileptonjets.pt.ones_like() * wgt).flatten())
        output['ljtype'] += processor.column_accumulator(
            (dileptonjets.ismutype.astype(int) * 1 +
             dileptonjets.iseltype.astype(int) * 2).flatten())
        output['channel'] += processor.column_accumulator(
            (dileptonjets.pt.ones_like() * channel_).flatten())

        return output
Example #28
0
    def process(self, events):
        # Dataset parameters
        dataset = events.metadata['dataset']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']
        datasets = [
            'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon',
            'DoubleElectron'
        ]
        for d in datasets:
            if d in dataset: dataset = dataset.split('_')[0]

        ### Recover objects, selection, functions and others...
        # Objects
        isTightMuon = self._objects['isTightMuonPOG']
        isTightElectron = self._objects['isTightElectronPOG']
        isGoodJet = self._objects['isGoodJet']
        isClean = self._objects['isClean']
        isMuonMVA = self._objects[
            'isMuonMVA']  #isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15)
        isElecMVA = self._objects[
            'isElecMVA']  #isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15)

        # Corrections
        GetMuonIsoSF = self._corrections['getMuonIso']
        GetMuonIDSF = self._corrections['getMuonID']

        # Selection
        passNJets = self._selection['passNJets']
        passMETcut = self._selection['passMETcut']
        passTrigger = self._selection['passTrigger']

        # Functions
        pow2 = self._functions['pow2']
        IsClosestToZ = self._functions['IsClosestToZ']
        GetGoodTriplets = self._functions['GetGoodTriplets']

        # Initialize objects
        met = events.MET
        e = events.Electron
        mu = events.Muon
        j = events.Jet

        # Electron selection
        #e['isGood'] = e.pt.zeros_like()
        e['isGood'] = isElecMVA(e.pt,
                                e.eta,
                                e.dxy,
                                e.dz,
                                e.miniPFRelIso_all,
                                e.sip3d,
                                e.mvaTTH,
                                e.mvaFall17V2Iso,
                                e.lostHits,
                                e.convVeto,
                                e.tightCharge,
                                minpt=10)
        leading_e = e[e.pt.argmax()]
        leading_e = leading_e[leading_e.isGood.astype(np.bool)]

        # Muon selection
        mu['isGood'] = isMuonMVA(mu.pt,
                                 mu.eta,
                                 mu.dxy,
                                 mu.dz,
                                 mu.miniPFRelIso_all,
                                 mu.sip3d,
                                 mu.mvaTTH,
                                 mu.mediumPromptId,
                                 mu.tightCharge,
                                 minpt=10)
        leading_mu = mu[mu.pt.argmax()]
        leading_mu = leading_mu[leading_mu.isGood.astype(np.bool)]

        e = e[e.isGood.astype(np.bool)]
        mu = mu[mu.isGood.astype(np.bool)]
        nElec = e.counts
        nMuon = mu.counts

        twoLeps = (nElec + nMuon) == 2
        threeLeps = (nElec + nMuon) == 3
        twoElec = (nElec == 2)
        twoMuon = (nMuon == 2)
        e0 = e[e.pt.argmax()]
        m0 = mu[mu.pt.argmax()]

        # Jet selection
        j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId)
        j['isclean'] = isClean(j, e, mu)
        goodJets = j[(j.isclean) & (j.isgood)]
        njets = goodJets.counts
        ht = goodJets.pt.sum()
        j0 = goodJets[goodJets.pt.argmax()]
        nbtags = goodJets[goodJets.btagDeepFlavB > 0.2770].counts

        ##################################################################
        ### 2 same-sign leptons
        ##################################################################

        # emu
        singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)]
        singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)]
        em = singe.cross(singm)
        emSSmask = (em.i0.charge * em.i1.charge > 0)
        emSS = em[emSSmask]
        nemSS = len(emSS.flatten())

        # ee and mumu
        # pt>-1 to preserve jagged dimensions
        ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)]
        mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)]

        eepairs = ee.distincts()
        eeSSmask = (eepairs.i0.charge * eepairs.i1.charge > 0)
        eeonZmask = (np.abs((eepairs.i0 + eepairs.i1).mass - 91) < 15)
        eeoffZmask = (eeonZmask == 0)

        mmpairs = mm.distincts()
        mmSSmask = (mmpairs.i0.charge * mmpairs.i1.charge > 0)
        mmonZmask = (np.abs((mmpairs.i0 + mmpairs.i1).mass - 91) < 15)
        mmoffZmask = (mmonZmask == 0)

        eeSSonZ = eepairs[eeSSmask & eeonZmask]
        eeSSoffZ = eepairs[eeSSmask & eeoffZmask]
        mmSSonZ = mmpairs[mmSSmask & mmonZmask]
        mmSSoffZ = mmpairs[mmSSmask & mmoffZmask]
        neeSS = len(eeSSonZ.flatten()) + len(eeSSoffZ.flatten())
        nmmSS = len(mmSSonZ.flatten()) + len(mmSSoffZ.flatten())

        #print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS))

        # Cuts
        eeSSmask = (eeSSmask[eeSSmask].counts > 0)
        mmSSmask = (mmSSmask[mmSSmask].counts > 0)
        eeonZmask = (eeonZmask[eeonZmask].counts > 0)
        eeoffZmask = (eeoffZmask[eeoffZmask].counts > 0)
        mmonZmask = (mmonZmask[mmonZmask].counts > 0)
        mmoffZmask = (mmoffZmask[mmoffZmask].counts > 0)
        emSSmask = (emSSmask[emSSmask].counts > 0)

        # njets

        ##################################################################
        ### 3 leptons
        ##################################################################

        # eem
        muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)]
        elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)]
        ee_eem = elec_eem.distincts()
        ee_eemZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs(
            (ee_eem.i0 + ee_eem.i1).mass - 91) < 15)
        ee_eemOffZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs(
            (ee_eem.i0 + ee_eem.i1).mass - 91) > 15)
        ee_eemZmask = (ee_eemZmask[ee_eemZmask].counts > 0)
        ee_eemOffZmask = (ee_eemOffZmask[ee_eemOffZmask].counts > 0)

        eepair_eem = (ee_eem.i0 + ee_eem.i1)
        trilep_eem = eepair_eem.cross(muon_eem)
        trilep_eem = (trilep_eem.i0 + trilep_eem.i1)

        # mme
        muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)]
        elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)]
        mm_mme = muon_mme.distincts()
        mm_mmeZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs(
            (mm_mme.i0 + mm_mme.i1).mass - 91) < 15)
        mm_mmeOffZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs(
            (mm_mme.i0 + mm_mme.i1).mass - 91) > 15)
        mm_mmeZmask = (mm_mmeZmask[mm_mmeZmask].counts > 0)
        mm_mmeOffZmask = (mm_mmeOffZmask[mm_mmeOffZmask].counts > 0)

        mmpair_mme = (mm_mme.i0 + mm_mme.i1)
        trilep_mme = mmpair_mme.cross(elec_mme)
        trilep_mme = (trilep_mme.i0 + trilep_mme.i1)
        mZ_mme = mmpair_mme.mass
        mZ_eem = eepair_eem.mass
        m3l_eem = trilep_eem.mass
        m3l_mme = trilep_mme.mass

        ### eee and mmm
        eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)]
        mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)]
        # Create pairs
        ee_pairs = eee.argchoose(2)
        mm_pairs = mmm.argchoose(2)

        # Select pairs that are SFOS.
        eeSFOS_pairs = ee_pairs[
            (np.abs(eee[ee_pairs.i0].pdgId) == np.abs(eee[ee_pairs.i1].pdgId))
            & (eee[ee_pairs.i0].charge != eee[ee_pairs.i1].charge)]
        mmSFOS_pairs = mm_pairs[
            (np.abs(mmm[mm_pairs.i0].pdgId) == np.abs(mmm[mm_pairs.i1].pdgId))
            & (mmm[mm_pairs.i0].charge != mmm[mm_pairs.i1].charge)]
        # Find the pair with mass closest to Z.
        eeOSSFmask = eeSFOS_pairs[np.abs((eee[eeSFOS_pairs.i0] +
                                          eee[eeSFOS_pairs.i1]).mass -
                                         91.2).argmin()]
        onZmask_ee = np.abs((eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]).mass -
                            91.2) < 15
        mmOSSFmask = mmSFOS_pairs[np.abs((mmm[mmSFOS_pairs.i0] +
                                          mmm[mmSFOS_pairs.i1]).mass -
                                         91.2).argmin()]
        onZmask_mm = np.abs((mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]).mass -
                            91.2) < 15
        offZmask_ee = np.abs((eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]).mass -
                             91.2) > 15
        offZmask_mm = np.abs((mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]).mass -
                             91.2) > 15

        # Create masks
        eeeOnZmask = onZmask_ee[onZmask_ee].counts > 0
        eeeOffZmask = offZmask_ee[offZmask_ee].counts > 0
        mmmOnZmask = onZmask_mm[onZmask_mm].counts > 0
        mmmOffZmask = offZmask_mm[offZmask_mm].counts > 0

        # Leptons from Z
        eZ0 = eee[eeOSSFmask.i0]
        eZ1 = eee[eeOSSFmask.i1]
        mZ0 = mmm[mmOSSFmask.i0]
        mZ1 = mmm[mmOSSFmask.i1]

        # Leptons from W
        eW = eee[~eeOSSFmask.i0 | ~eeOSSFmask.i1]
        mW = mmm[~mmOSSFmask.i0 | ~mmOSSFmask.i1]

        eZ = eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]
        triElec = eZ + eW
        mZ = mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]
        triMuon = mZ + mW

        mZ_eee = eZ.mass
        m3l_eee = triElec.mass
        mZ_mmm = mZ.mass
        m3l_mmm = triMuon.mass

        # Triggers
        #passTrigger = lambda events, n, m, o : np.ones_like(events['MET_pt'], dtype=np.bool) # XXX
        trig_eeSS = passTrigger(events, 'ee', isData, dataset)
        trig_mmSS = passTrigger(events, 'mm', isData, dataset)
        trig_emSS = passTrigger(events, 'em', isData, dataset)
        trig_eee = passTrigger(events, 'eee', isData, dataset)
        trig_mmm = passTrigger(events, 'mmm', isData, dataset)
        trig_eem = passTrigger(events, 'eem', isData, dataset)
        trig_mme = passTrigger(events, 'mme', isData, dataset)

        # MET filters

        # Weights
        genw = np.ones_like(
            events['MET_pt']) if isData else events['genWeight']
        weights = processor.Weights(events.size)
        weights.add('norm', genw if isData else (xsec / sow) * genw)

        # Selections and cuts
        selections = processor.PackedSelection()
        channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS']
        selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('emSS', (emSSmask) & (trig_emSS))

        channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ']
        selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem))
        selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem))
        selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme))
        selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme))

        channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ']
        selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee))
        selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee))
        selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm))
        selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm))

        levels = ['base', '2jets', '4jets', '4j1b', '4j2b']
        selections.add('base', (nElec + nMuon >= 2))
        selections.add('2jets', (njets >= 2))
        selections.add('4jets', (njets >= 4))
        selections.add('4j1b', (njets >= 4) & (nbtags >= 1))
        selections.add('4j2b', (njets >= 4) & (nbtags >= 2))

        # Variables
        invMass_eeSSonZ = (eeSSonZ.i0 + eeSSonZ.i1).mass
        invMass_eeSSoffZ = (eeSSoffZ.i0 + eeSSoffZ.i1).mass
        invMass_mmSSonZ = (mmSSonZ.i0 + mmSSonZ.i1).mass
        invMass_mmSSoffZ = (mmSSoffZ.i0 + mmSSoffZ.i1).mass
        invMass_emSS = (emSS.i0 + emSS.i1).mass

        varnames = {}
        varnames['met'] = met.pt
        varnames['ht'] = ht
        varnames['njets'] = njets
        varnames['nbtags'] = nbtags
        varnames['invmass'] = {
            'eeSSonZ': invMass_eeSSonZ,
            'eeSSoffZ': invMass_eeSSoffZ,
            'mmSSonZ': invMass_mmSSonZ,
            'mmSSoffZ': invMass_mmSSoffZ,
            'emSS': invMass_emSS,
            'eemSSonZ': mZ_eem,
            'eemSSoffZ': mZ_eem,
            'mmeSSonZ': mZ_mme,
            'mmeSSoffZ': mZ_mme,
            'eeeSSonZ': mZ_eee,
            'eeeSSoffZ': mZ_eee,
            'mmmSSonZ': mZ_mmm,
            'mmmSSoffZ': mZ_mmm,
        }
        varnames['m3l'] = {
            'eemSSonZ': m3l_eem,
            'eemSSoffZ': m3l_eem,
            'mmeSSonZ': m3l_mme,
            'mmeSSoffZ': m3l_mme,
            'eeeSSonZ': m3l_eee,
            'eeeSSoffZ': m3l_eee,
            'mmmSSonZ': m3l_mmm,
            'mmmSSoffZ': m3l_mmm,
        }
        varnames['e0pt'] = e0.pt
        varnames['e0eta'] = e0.eta
        varnames['m0pt'] = m0.pt
        varnames['m0eta'] = m0.eta
        varnames['j0pt'] = j0.pt
        varnames['j0eta'] = j0.eta
        varnames['counts'] = np.ones_like(events.MET.pt, dtype=np.int)

        # Fill Histos
        hout = self.accumulator.identity()
        hout['dummy'].fill(sample=dataset, dummy=1, weight=events.size)

        for var, v in varnames.items():
            for ch in channels2LSS + channels3L:
                for lev in levels:
                    weight = weights.weight()
                    cuts = [ch] + [lev]
                    cut = selections.all(*cuts)
                    weights_flat = weight[cut].flatten()
                    weights_ones = np.ones_like(weights_flat, dtype=np.int)
                    if var == 'invmass':
                        if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue
                        elif ch in ['eeeSSonZ', 'mmmSSonZ']:
                            continue  #values = v[ch]
                        else:
                            values = v[ch][cut].flatten()
                        hout['invmass'].fill(sample=dataset,
                                             channel=ch,
                                             cut=lev,
                                             invmass=values,
                                             weight=weights_flat)
                    elif var == 'm3l':
                        if ch in [
                                'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ',
                                'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ',
                                'mmmSSonZ'
                        ]:
                            continue
                        values = v[ch][cut].flatten()
                        hout['m3l'].fill(sample=dataset,
                                         channel=ch,
                                         cut=lev,
                                         m3l=values,
                                         weight=weights_flat)
                    else:
                        values = v[cut].flatten()
                        if var == 'ht':
                            hout[var].fill(ht=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'met':
                            hout[var].fill(met=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'njets':
                            hout[var].fill(njets=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'nbtags':
                            hout[var].fill(nbtags=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'counts':
                            hout[var].fill(counts=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_ones)
                        elif var == 'e0pt':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            hout[var].fill(e0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0pt':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            hout[var].fill(m0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'e0eta':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            hout[var].fill(e0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0eta':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            hout[var].fill(m0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0pt':
                            if lev == 'base': continue
                            hout[var].fill(j0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0eta':
                            if lev == 'base': continue
                            hout[var].fill(j0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)

        return hout
Example #29
0
def get_veto_weights(df, evaluator, electrons, muons, taus, do_variations=False):
    """
    Calculate veto weights for SR W

    The weights are effectively:

        w = product(1-SF)

    where the product runs overveto-able e, mu, tau.
    """
    veto_weights = processor.Weights(size=df.size, storeIndividual=True)

    variations = ["nominal"]
    if do_variations:
        variations.extend([
                      'ele_reco_up','ele_reco_dn',
                      'ele_id_up','ele_id_dn',
                      'muon_id_up','muon_id_dn',
                      'muon_iso_up','muon_iso_dn',
                      'tau_id_up','tau_id_dn'
                      ])

    for variation in variations:
        def varied_weight(sfname, *args):
            '''Helper function to easily get the correct weights for a given variation'''

            # For the nominal variation, just pass through
            if 'nominal' in variation:
                return evaluator[sfname](*args)

            # If this variation is unrelated to the SF at hand,
            # pass through as well
            if not (re.sub('_(up|dn)', '', variation) in sfname):
                return evaluator[sfname](*args)

            # Direction of variation
            sgn = 1 if variation.endswith("up") else -1
            return evaluator[sfname](*args) + sgn * evaluator[f"{sfname}_error"](*args)


        ### Electrons
        if extract_year(df['dataset']) == 2017:
            high_et = electrons.pt>20

            # Low pt SFs
            low_pt_args = (electrons.etasc[~high_et], electrons.pt[~high_et])
            ele_reco_sf_low = varied_weight('ele_reco_pt_lt_20', *low_pt_args)
            ele_id_sf_low = varied_weight("ele_id_loose", *low_pt_args)

            # High pt SFs
            high_pt_args = (electrons.etasc[high_et], electrons.pt[high_et])

            ele_reco_sf_high = varied_weight("ele_reco", *high_pt_args)
            ele_id_sf_high = varied_weight("ele_id_loose", *high_pt_args)

            # Combine
            veto_weight_ele = (1 - ele_reco_sf_low*ele_id_sf_low).prod() * (1-ele_reco_sf_high*ele_id_sf_high).prod()
        else:
            # No split for 2018
            args = (electrons.etasc, electrons.pt)
            ele_reco_sf = varied_weight("ele_reco", *args)
            ele_id_sf = varied_weight("ele_id_loose", *args)

            # Combine
            veto_weight_ele = (1 - ele_id_sf*ele_reco_sf).prod()

        ### Muons
        args = (muons.pt, muons.abseta)
        veto_weight_muo = (1 - varied_weight("muon_id_loose", *args)*varied_weight("muon_iso_loose", *args)).prod()

        ### Taus
        # Taus have their variations saves as separate histograms,
        # so our cool trick from above is replaced by the pedestrian way
        if "tau_id" in variation:
            direction = variation.split("_")[-1]
            tau_sf_name = f"tau_id_{direction}"
        else:
            tau_sf_name = "tau_id"
        veto_weight_tau = (1 - evaluator[tau_sf_name](taus.pt)).prod()

        ### Combine
        total = veto_weight_ele * veto_weight_muo * veto_weight_tau

        # Cap weights just in case
        total[np.abs(total)>5] = 1
        veto_weights.add(variation, total)

    return veto_weights
Example #30
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_v_jet'] = has_v_jet(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df['is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if not df['is_data']:
            gen = setup_gen_candidates(df)
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df['is_nlo_w']:
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_combined']
        elif df['is_lo_g']:
            gen_v_pt = gen[(gen.pdg==22) & (gen.status==1)].pt.max()

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons = setup_candidates(df, cfg)

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts==1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts==1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index=ak4.pt.argmax()

        elejet_pairs = ak4[:,:1].cross(electrons)
        df['dREleJet'] = np.hypot(elejet_pairs.i0.eta-elejet_pairs.i1.eta , dphi(elejet_pairs.i0.phi,elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:,:1].cross(muons)
        df['dRMuonJet'] = np.hypot(muonjet_pairs.i0.eta-muonjet_pairs.i1.eta , dphi(muonjet_pairs.i0.phi,muonjet_pairs.i1.phi)).min()

        # Photons
        # Angular distance leading photon - leading jet
        phojet_pairs = ak4[:,:1].cross(photons[:,:1])
        df['dRPhotonJet'] = np.hypot(phojet_pairs.i0.eta-phojet_pairs.i1.eta , dphi(phojet_pairs.i0.phi,phojet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons)
        df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]
        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=2.4)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=2.4)
        selection = processor.PackedSelection()



        # Triggers
        pass_all = np.ones(df.size)==1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)
        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts==0)
        selection.add('veto_muo', muons.counts==0)
        selection.add('veto_photon', photons.counts==0)
        selection.add('veto_tau', taus.counts==0)
        selection.add('veto_b', bjets.counts==0)
        selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('dpfcalo',np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL)

        if(cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC):
            selection.add('hemveto', df['hemveto'])
        else:
            selection.add('hemveto', np.ones(df.size)==1)

        # AK4 Jet
        leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \
                         & (ak4.abseta[leadak4_index] < cfg.SELECTION.SIGNAL.leadak4.ETA).any()
        selection.add('leadak4_pt_eta', leadak4_pt_eta)

        selection.add('leadak4_id',(ak4.tightId[leadak4_index] \
                                                    & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \
                                                    & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any())

        # AK8 Jet
        leadak8_index=ak8.pt.argmax()
        leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \
                         & (ak8.abseta[leadak8_index] < cfg.SELECTION.SIGNAL.leadak8.ETA).any()
        selection.add('leadak8_pt_eta', leadak8_pt_eta)

        selection.add('leadak8_id',(ak8.tightId[leadak8_index]).any())

        # Mono-V selection
        selection.add('leadak8_tau21', ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) < cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any())
        selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \
                                    & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any())
        selection.add('leadak8_wvsqcd_loosemd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.LOOSEMD)
                                    & (ak8.wvsqcdmd[leadak8_index] < cfg.WTAG.TIGHTMD)).any())
        selection.add('leadak8_wvsqcd_tightmd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.TIGHTMD)).any())
        selection.add('leadak8_wvsqcd_loose', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.LOOSE)
                                    & (ak8.wvsqcd[leadak8_index] < cfg.WTAG.TIGHT)).any())
        selection.add('leadak8_wvsqcd_tight', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.TIGHT)).any())

        selection.add('veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id", "leadak8_tau21", "leadak8_mass"))
        selection.add('only_one_ak8', ak8.counts==1)

        # Dimuon CR
        leadmuon_index=muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge==0).any())
        selection.add('two_muons', muons.counts==2)

        # Single muon CR
        selection.add('one_muon', muons.counts==1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index=electrons.pt.argmax()


        selection.add('one_electron', electrons.counts==1)
        selection.add('two_electrons', electrons.counts==2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())

        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge==0).any())
        selection.add('two_electrons', electrons.counts==2)

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index=photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId \
                         & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA)

        selection.add('one_photon', photons.counts==1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if gen_v_pt is not None:
            output['genvpt_check'].fill(vpt=gen_v_pt,type="Nano", dataset=dataset, weight=df['Generator_weight'])

        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons, electrons, photons)
            weights = pileup_weights(weights, df, evaluator, cfg)
            if not (gen_v_pt is None):
                weights = theory_weights_monojet(weights, df, evaluator, gen_v_pt)

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask].flatten()]
                output['kinematics']['met_phi'] += [met_phi[mask].flatten()]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask].flatten()]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask].flatten()]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt.flatten()]
                output['kinematics']['ak4eta0'] += [ak4[leadak4_index][mask].eta.flatten()]
                output['kinematics']['leadbtag'] += [ak4.pt.max()<0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [muons[df['is_tight_muon']].counts[mask].flatten()]
                output['kinematics']['mupt0'] += [muons[leadmuon_index][mask].pt.flatten()]
                output['kinematics']['mueta0'] += [muons[leadmuon_index][mask].eta.flatten()]
                output['kinematics']['muphi0'] += [muons[leadmuon_index][mask].phi.flatten()]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [electrons[df['is_tight_electron']].counts[mask].flatten()]
                output['kinematics']['elpt0'] += [electrons[leadelectron_index][mask].pt.flatten()]
                output['kinematics']['eleta0'] += [electrons[leadelectron_index][mask].eta.flatten()]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [photons[df['is_tight_photon']].counts[mask].flatten()]
                output['kinematics']['gpt0'] += [photons[leadphoton_index][mask].pt.flatten()]
                output['kinematics']['geta0'] += [photons[leadphoton_index][mask].eta.flatten()]


        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] +=  df['genEventSumw']
            output['sumw2'][dataset] +=  df['genEventSumw2']
            output['sumw_pileup'][dataset] +=  weights.partial_weight(include=['pileup']).sum()

        regions = monojet_regions(cfg)

        for region, cuts in regions.items():
            region_weights = copy.deepcopy(weights)
            if not df['is_data']:
                if re.match(r'cr_(\d+)e.*', region):
                    region_weights.add('trigger', np.ones(df.size))
                elif re.match(r'cr_(\d+)m.*', region) or re.match('sr_.*', region):
                    region_weights.add('trigger', evaluator["trigger_met"](df['recoil_pt']))
                elif re.match(r'cr_g.*', region):
                    region_weights.add('trigger', np.ones(df.size))

            if not df['is_data']:
                genVs = gen[((gen.pdg==23) | (gen.pdg==24) | (gen.pdg==-24)) & (gen.pt>10)]
                leadak8 = ak8[ak8.pt.argmax()]
                leadak8_matched_mask = leadak8.match(genVs, deltaRCut=0.8)
                matched_leadak8 = leadak8[leadak8_matched_mask]
                unmatched_leadak8 = leadak8[~leadak8_matched_mask]
                for wp in ['loose','loosemd','tight','tightmd']:
                    if re.match(r'.*_{wp}_v.*', region):

                        if (wp == 'tight') or ('nomistag' in region): # no mistag SF available for tight cut
                            matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod()
                        else:
                            matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() \
                                    * evaluator[f'wtag_mistag_{wp}'](unmatched_leadak8.pt).prod()

                        region_weights.add('wtag_{wp}', matched_weights)



            # Blinding
            if(self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region]['all']+=df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' + region][cutname] += selection.all(*cuts[:icut+1]).sum()

            mask = selection.all(*cuts)


            if cfg.RUN.SAVE.TREE:
                def fill_tree(variable, values):
                    treeacc = processor.column_accumulator(values)
                    name = f'tree_{region}_{variable}'
                    if dataset in output[name].keys():
                        output[name][dataset] += treeacc
                    else:
                        output[name][dataset] = treeacc
                if region in ['cr_2m_j','cr_1m_j','cr_2e_j','cr_1e_j','cr_g_j']:
                    fill_tree('recoil',df['recoil_pt'][mask].flatten())
                    fill_tree('weight',region_weights.weight()[mask].flatten())
                    if gen_v_pt is not None:
                        fill_tree('gen_v_pt',gen_v_pt[mask].flatten())
                    else:
                        fill_tree('gen_v_pt', -1 * np.ones(sum(mask)))
            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])


            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(
                                  dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=region_weights.weight()[mask]
                                  )

            fill_mult('ak8_mult', ak8)
            fill_mult('ak4_mult', ak4)
            fill_mult('bjet_mult',bjets)
            fill_mult('loose_ele_mult',electrons)
            fill_mult('tight_ele_mult',electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult',muons)
            fill_mult('tight_muo_mult',muons[df['is_tight_muon']])
            fill_mult('tau_mult',taus)
            fill_mult('photon_mult',photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(
                                  dataset=dataset,
                                  region=region,
                                  **kwargs
                                  )
            # Monitor weights
            for wname, wvalue in region_weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, region_weights.weight()[mask])

            ezfill('ak4_eta',    jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi',    jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_eta_phi', phi=ak4[mask].phi.flatten(),eta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_pt',     jetpt=ak4[mask].pt.flatten(),   weight=w_alljets)

            # Leading ak4
            w_leadak4 = weight_shape(ak4[leadak4_index].eta[mask], region_weights.weight()[mask])
            ezfill('ak4_eta0',   jeteta=ak4[leadak4_index].eta[mask].flatten(),    weight=w_leadak4)
            ezfill('ak4_phi0',   jetphi=ak4[leadak4_index].phi[mask].flatten(),    weight=w_leadak4)
            ezfill('ak4_pt0',    jetpt=ak4[leadak4_index].pt[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_ptraw0',    jetpt=ak4[leadak4_index].ptraw[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_chf0',    frac=ak4[leadak4_index].chf[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_nhf0',    frac=ak4[leadak4_index].nhf[mask].flatten(),      weight=w_leadak4)

            ezfill('drelejet',    dr=df['dREleJet'][mask],      weight=region_weights.weight()[mask])
            ezfill('drmuonjet',    dr=df['dRMuonJet'][mask],      weight=region_weights.weight()[mask])
            ezfill('drphotonjet',    dr=df['dRPhotonJet'][mask],  weight=region_weights.weight()[mask])

            # AK8 jets
            if region=='inclusive' or region.endswith('v'):
                # All
                w_allak8 = weight_shape(ak8.eta[mask], region_weights.weight()[mask])

                ezfill('ak8_eta',    jeteta=ak8[mask].eta.flatten(), weight=w_allak8)
                ezfill('ak8_phi',    jetphi=ak8[mask].phi.flatten(), weight=w_allak8)
                ezfill('ak8_pt',     jetpt=ak8[mask].pt.flatten(),   weight=w_allak8)
                ezfill('ak8_mass',   mass=ak8[mask].mass.flatten(),  weight=w_allak8)

                # Leading
                w_leadak8 = weight_shape(ak8[leadak8_index].eta[mask], region_weights.weight()[mask])

                ezfill('ak8_eta0',       jeteta=ak8[leadak8_index].eta[mask].flatten(),    weight=w_leadak8)
                ezfill('ak8_phi0',       jetphi=ak8[leadak8_index].phi[mask].flatten(),    weight=w_leadak8)
                ezfill('ak8_pt0',        jetpt=ak8[leadak8_index].pt[mask].flatten(),      weight=w_leadak8 )
                ezfill('ak8_mass0',      mass=ak8[leadak8_index].mass[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_tau210',     tau21=ak8[leadak8_index].tau21[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_wvsqcd0',    tagger=ak8[leadak8_index].wvsqcd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_wvsqcdmd0',  tagger=ak8[leadak8_index].wvsqcdmd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_zvsqcd0',    tagger=ak8[leadak8_index].zvsqcd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_zvsqcdmd0',  tagger=ak8[leadak8_index].zvsqcdmd[mask].flatten(),     weight=w_leadak8)

                # histogram with only gen-matched lead ak8 pt
                if not df['is_data']:
                    w_matchedleadak8 = weight_shape(matched_leadak8.eta[mask], region_weights.weight()[mask])
                    ezfill('ak8_Vmatched_pt0', jetpt=matched_leadak8.pt[mask].flatten(),      weight=w_matchedleadak8 )


                # Dimuon specifically for deepak8 mistag rate measurement
                if 'inclusive_v' in region:
                    ezfill('ak8_passloose_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtight_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloosemd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtightmd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloose_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtight_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloosemd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtightmd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )

            # MET
            ezfill('dpfcalo',            dpfcalo=df["dPFCalo"][mask],       weight=region_weights.weight()[mask] )
            ezfill('met',                met=met_pt[mask],            weight=region_weights.weight()[mask] )
            ezfill('met_phi',            phi=met_phi[mask],            weight=region_weights.weight()[mask] )
            ezfill('recoil',             recoil=df["recoil_pt"][mask],      weight=region_weights.weight()[mask] )
            ezfill('recoil_phi',         phi=df["recoil_phi"][mask],      weight=region_weights.weight()[mask] )
            ezfill('recoil_nopog',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(include=['pileup','theory','gen','prefire'])[mask])
            ezfill('recoil_nopref',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['prefire'])[mask])
            ezfill('recoil_nopu',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('recoil_notrg',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['trigger'])[mask])
            ezfill('ak4_pt0_over_recoil',    ratio=ak4.pt.max()[mask]/df["recoil_pt"][mask],      weight=region_weights.weight()[mask])
            ezfill('dphijm',             dphi=df["minDPhiJetMet"][mask],    weight=region_weights.weight()[mask] )
            ezfill('dphijr',             dphi=df["minDPhiJetRecoil"][mask],    weight=region_weights.weight()[mask] )

            if 'noveto' in region:
                continue

            # Muons
            if '_1m_' in region or '_2m_' in region:
                w_allmu = weight_shape(muons.pt[mask], region_weights.weight()[mask])
                ezfill('muon_pt',   pt=muons.pt[mask].flatten(),    weight=w_allmu )
                ezfill('muon_mt',   mt=df['MT_mu'][mask],           weight=region_weights.weight()[mask])
                ezfill('muon_eta',  eta=muons.eta[mask].flatten(),  weight=w_allmu)
                ezfill('muon_eta_phi', phi=muons.phi[mask].flatten(),eta=muons.eta[mask].flatten(), weight=w_allmu)
                ezfill('muon_phi',  phi=muons.phi[mask].flatten(),  weight=w_allmu)
                ezfill('muon_dxy',  dxy=muons.dxy[mask].flatten(),  weight=w_allmu)
                ezfill('muon_dz',  dz=muons.dz[mask].flatten(),  weight=w_allmu)

                # Leading muon
                w_leadmu = weight_shape(muons[leadmuon_index].pt[mask], region_weights.weight()[mask])
                ezfill('muon_pt0',   pt=muons[leadmuon_index].pt[mask].flatten(),    weight=w_leadmu )
                ezfill('muon_eta0',  eta=muons[leadmuon_index].eta[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_phi0',  phi=muons[leadmuon_index].phi[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_dxy0',  dxy=muons[leadmuon_index].dxy[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_dz0',  dz=muons[leadmuon_index].dz[mask].flatten(),  weight=w_leadmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], region_weights.weight()[mask])

                ezfill('dimuon_pt',     pt=dimuons.pt[mask].flatten(),              weight=w_dimu)
                ezfill('dimuon_eta',    eta=dimuons.eta[mask].flatten(),            weight=w_dimu)
                ezfill('dimuon_mass',   dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu )
                ezfill('dimuon_dr',   dr=dimuons.i0.p4.delta_r(dimuons.i1.p4)[mask].flatten(), weight=w_dimu )

                ezfill('muon_pt1',   pt=muons[~leadmuon_index].pt[mask].flatten(),    weight=w_leadmu )
                ezfill('muon_eta1',  eta=muons[~leadmuon_index].eta[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_phi1',  phi=muons[~leadmuon_index].phi[mask].flatten(),  weight=w_leadmu)

            # Electrons
            if '_1e_' in region or '_2e_' in region:
                w_allel = weight_shape(electrons.pt[mask], region_weights.weight()[mask])
                ezfill('electron_pt',   pt=electrons.pt[mask].flatten(),    weight=w_allel)
                ezfill('electron_mt',   mt=df['MT_el'][mask],               weight=region_weights.weight()[mask])
                ezfill('electron_eta',  eta=electrons.eta[mask].flatten(),  weight=w_allel)
                ezfill('electron_phi',  phi=electrons.phi[mask].flatten(),  weight=w_allel)
                ezfill('electron_eta_phi', phi=electrons.phi[mask].flatten(),eta=electrons.eta[mask].flatten(), weight=w_allel)
                ezfill('electron_dz',  dz=electrons.dz[mask].flatten(),  weight=w_allel)
                ezfill('electron_dxy',  dxy=electrons.dxy[mask].flatten(),  weight=w_allel)

                w_leadel = weight_shape(electrons[leadelectron_index].pt[mask], region_weights.weight()[mask])
                ezfill('electron_pt0',   pt=electrons[leadelectron_index].pt[mask].flatten(),    weight=w_leadel)
                ezfill('electron_eta0',  eta=electrons[leadelectron_index].eta[mask].flatten(),  weight=w_leadel)
                ezfill('electron_phi0',  phi=electrons[leadelectron_index].phi[mask].flatten(),  weight=w_leadel)

                w_trailel = weight_shape(electrons[~leadelectron_index].pt[mask], region_weights.weight()[mask])
                ezfill('electron_tightid1',  id=electrons[~leadelectron_index].tightId[mask].flatten(),  weight=w_trailel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask], region_weights.weight()[mask])
                ezfill('dielectron_pt',     pt=dielectrons.pt[mask].flatten(),                  weight=w_diel)
                ezfill('dielectron_eta',    eta=dielectrons.eta[mask].flatten(),                weight=w_diel)
                ezfill('dielectron_mass',   dilepton_mass=dielectrons.mass[mask].flatten(),     weight=w_diel)
                ezfill('dielectron_dr',   dr=dielectrons.i0.p4.delta_r(dielectrons.i1.p4)[mask].flatten(), weight=w_diel )

                ezfill('electron_pt1',   pt=electrons[~leadelectron_index].pt[mask].flatten(),    weight=w_leadel)
                ezfill('electron_eta1',  eta=electrons[~leadelectron_index].eta[mask].flatten(),  weight=w_leadel)
                ezfill('electron_phi1',  phi=electrons[~leadelectron_index].phi[mask].flatten(),  weight=w_leadel)
            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask],region_weights.weight()[mask]);
                ezfill('photon_pt0',              pt=photons[leadphoton_index].pt[mask].flatten(),    weight=w_leading_photon)
                ezfill('photon_eta0',             eta=photons[leadphoton_index].eta[mask].flatten(),  weight=w_leading_photon)
                ezfill('photon_phi0',             phi=photons[leadphoton_index].phi[mask].flatten(),  weight=w_leading_photon)
                ezfill('photon_eta_phi', phi=photons[leadphoton_index].phi[mask].flatten(),eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], region_weights.weight()[mask])

            # PV
            ezfill('npv', nvtx=df['PV_npvs'][mask], weight=region_weights.weight()[mask])
            ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=region_weights.weight()[mask])

            ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])

            ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.weight()[mask])
            ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.weight()[mask])
            ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
        return output