コード例 #1
0
ファイル: lheVProcessor.py プロジェクト: snwebb/bucoffea
def monojet_selection(vphi, genjets):
    selection = processor.PackedSelection()

    selection.add('at_least_one_jet', genjets.counts > 0)
    selection.add('leadak4_pt_eta', (genjets.pt.max() > 100) &
                  (np.abs(genjets[genjets.pt.argmax()].eta.max()) < 2.4))
    selection.add('mindphijr',
                  min_dphi_jet_met(genjets, vphi, njet=4, ptmin=30) > 0.5)

    return selection
コード例 #2
0
    def process(self, df):
        '''Fill and save histograms.'''
        dataset = df['dataset']
        # Set up physics objects
        ak4, htmiss, ht = self._setup_candidates(df)

        # Leading jet pair
        diak4 = ak4[:, :2].distincts()

        df['mjj'] = diak4.mass.max()
        df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max())
        df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max()

        selection = processor.PackedSelection()
        pass_all = np.ones(df.size) == 1
        selection.add('inclusive', pass_all)

        leadak4_pt_eta = (diak4.i0.pt > 80) & (np.abs(diak4.i0.eta) < 4.7)
        trailak4_pt_eta = (diak4.i1.pt > 40) & (np.abs(diak4.i1.eta) < 4.7)
        hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any()

        selection.add('mjj', df['mjj'] > 200)
        selection.add('detajj', df['detajj'] > 1.)
        selection.add('dphijj', df['dphijj'] < 1.5)
        selection.add('hemisphere', hemisphere)
        selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
        selection.add('trailak4_pt_eta', trailak4_pt_eta.any())

        output = self.accumulator.identity()
        if not df['is_data']:
            output['sumw'][dataset] += df['sumw']
            output['sumw2'][dataset] += df['sumw2']

        for region, cuts in self.regions.items():
            mask = selection.all(*cuts)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(dataset=dataset, region=region, **kwargs)

            # Fill histograms
            ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten())
            ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten())
            ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten())

            ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten())
            ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten())
            ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten())

            ezfill('mjj', mjj=df['mjj'][mask])
            ezfill('ht', ht=ht[mask])
            ezfill('htmiss', ht=htmiss[mask])

        return output
コード例 #3
0
def vbf_selection(vphi, dijet, genjets):
    selection = processor.PackedSelection()

    selection.add('two_jets', dijet.counts > 0)
    selection.add('leadak4_pt_eta', (dijet.i0.pt.max() > 80) &
                  (np.abs(dijet.i0.eta.max()) < 4.7))
    selection.add('trailak4_pt_eta', (dijet.i1.pt.max() > 40) &
                  (np.abs(dijet.i1.eta.max()) < 4.7))
    selection.add('hemisphere', (dijet.i0.eta.max() * dijet.i1.eta.max() < 0))
    selection.add(
        'mindphijr',
        min_dphi_jet_met(genjets, vphi.max(), njet=4, ptmin=30) > 0.5)

    return selection
コード例 #4
0
ファイル: lheVProcessor.py プロジェクト: snwebb/bucoffea
def vbf_selection(vphi, dijet, genjets):
    selection = processor.PackedSelection()
    selection.add('two_jets', dijet.counts > 0)
    selection.add('leadak4_pt_eta', (dijet.i0.pt.max() > 80) &
                  (np.abs(dijet.i0.eta.max()) < 5.0))
    selection.add('trailak4_pt_eta', (dijet.i1.pt.max() > 40) &
                  (np.abs(dijet.i1.eta.max()) < 5.0))
    selection.add('hemisphere', (dijet.i0.eta.max() * dijet.i1.eta.max() < 0))
    selection.add(
        'mindphijr',
        min_dphi_jet_met(genjets, vphi, njet=4, ptmin=30, etamax=5.0) > 0.5)
    selection.add('detajj', np.abs(dijet.i0.eta - dijet.i1.eta).max() > 1)
    selection.add('dphijj', dphi(dijet.i0.phi, dijet.i1.phi).min() < 1.5)

    return selection
コード例 #5
0
    def process(self, df):
        np.random.seed(
            10
        )  # sets seed so values from random distributions are reproducible (JER corrections)
        output = self.accumulator.identity()

        self.sample_name = df.dataset

        ## make event weights
        # data or MC distinction made internally
        evt_weights = MCWeights.get_event_weights(df,
                                                  year=args.year,
                                                  corrections=self.corrections,
                                                  BTagSFs=btaggers)

        ## initialize selections and regions
        selection = processor.PackedSelection()
        regions = {
            'Muon': {
                'Loose': {
                    'btagPass': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_pass'},
                        '4PJets': {
                            'objselection', 'jets_4p', 'loose_MU',
                            'DeepCSV_pass'
                        },
                    },
                    'btagFail': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_fail'},
                        '4PJets': {
                            'objselection', 'jets_4p', 'loose_MU',
                            'DeepCSV_fail'
                        },
                    },
                },
                'Tight': {
                    'btagPass': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_pass'},
                        '4PJets': {
                            'objselection', 'jets_4p', 'tight_MU',
                            'DeepCSV_pass'
                        },
                    },
                    'btagFail': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_fail'},
                        '4PJets': {
                            'objselection', 'jets_4p', 'tight_MU',
                            'DeepCSV_fail'
                        },
                    },
                },
            },
            'Electron': {
                'Loose': {
                    'btagPass': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_pass'},
                        '4PJets': {
                            'objselection', 'jets_4p', 'loose_EL',
                            'DeepCSV_pass'
                        },
                    },
                    'btagFail': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_fail'},
                        '4PJets': {
                            'objselection', 'jets_4p', 'loose_EL',
                            'DeepCSV_fail'
                        },
                    },
                },
                'Tight': {
                    'btagPass': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_pass'},
                        '4PJets': {
                            'objselection', 'jets_4p', 'tight_EL',
                            'DeepCSV_pass'
                        },
                    },
                    'btagFail': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_fail'},
                        '4PJets': {
                            'objselection', 'jets_4p', 'tight_EL',
                            'DeepCSV_fail'
                        },
                    },
                },
            },
        }

        ## object selection
        objsel_evts = objsel.select(df,
                                    year=args.year,
                                    corrections=self.corrections,
                                    accumulator=output)
        output['cutflow'][
            'nEvts passing jet and lepton obj selection'] += objsel_evts.sum()
        selection.add('jets_3', df['Jet'].counts == 3)
        selection.add('jets_4p', df['Jet'].counts > 3)
        selection.add('objselection', objsel_evts)
        #selection.add('DeepJet_pass', df['Jet']['DeepJet'+wps_to_use[0]].sum() >= 2)
        selection.add('DeepCSV_pass',
                      df['Jet']['DeepCSV' + wps_to_use[0]].sum() >= 2)

        #set_trace()
        # sort jets by btag value
        df['Jet'] = df['Jet'][df['Jet']['btagDeepB'].argsort(
            ascending=False)] if btaggers[0] == 'DeepCSV' else df['Jet'][
                df['Jet']['btagDeepFlavB'].argsort(ascending=False)]

        # btag fail sideband
        deepcsv_sorted = df['Jet'][df['Jet']['btagDeepB'].argsort(
            ascending=False)]['btagDeepB']
        valid_counts_inds = np.where(df['Jet'].counts > 1)[0]
        deepcsv_fail = np.zeros(df.size).astype(bool)
        deepcsv_fail[valid_counts_inds] = (
            deepcsv_sorted[valid_counts_inds][:, 0] <
            btag_values[args.year]['btagDeepB']['DeepCSV' + wps_to_use[0]]) & (
                deepcsv_sorted[valid_counts_inds][:, 1] <
                btag_values[args.year]['btagDeepB']['DeepCSV' + wps_to_use[0]])
        selection.add(
            'DeepCSV_fail', deepcsv_fail
        )  # highest and second highest DeepCSV values don't pass tight and loose WPs

        self.isData = self.sample_name.startswith('data_Single')
        if self.isData:
            isSE_Data = self.sample_name.startswith('data_SingleElectron')
            isSM_Data = self.sample_name.startswith('data_SingleMuon')
            runs = df.run
            lumis = df.luminosityBlock
            Golden_Json_LumiMask = lumi_tools.LumiMask(
                '%s/inputs/data/LumiMasks/%s_GoldenJson.txt' %
                (proj_dir, args.year))
            LumiMask = Golden_Json_LumiMask.__call__(
                runs, lumis)  ## returns array of valid events
            selection.add('lumimask', LumiMask)

            ## object selection and add different selections
            if isSM_Data:
                del regions['Electron']
                ## muons
                selection.add('tight_MU', df['Muon']['TIGHTMU'].sum() ==
                              1)  # one muon passing TIGHT criteria
                selection.add('loose_MU', df['Muon']['LOOSEMU'].sum() ==
                              1)  # one muon passing LOOSE criteria
                #selection.add('loose_or_tight_MU', (df['Muon']['LOOSEMU'] | df['Muon']['TIGHTMU']).sum() == 1) # one muon passing LOOSE or TIGHT criteria
            if isSE_Data:
                del regions['Muon']
                ## electrons
                selection.add('tight_EL', df['Electron']['TIGHTEL'].sum() ==
                              1)  # one electron passing TIGHT criteria
                selection.add('loose_EL', df['Electron']['LOOSEEL'].sum() ==
                              1)  # one electron passing LOOSE criteria
                #selection.add('loose_or_tight_EL', (df['Electron']['LOOSEEL'] | df['Electron']['TIGHTEL']).sum() == 1) # one electron passing LOOSE or TIGHT criteria

            for lepton in regions.keys():
                for lepcat in regions[lepton].keys():
                    for btagregion in regions[lepton][lepcat].keys():
                        for jmult in regions[lepton][lepcat][btagregion].keys(
                        ):
                            regions[lepton][lepcat][btagregion][jmult].update(
                                {'lumimask'})

        if not self.isData:
            ## add different selections
            ## muons
            selection.add('tight_MU', df['Muon']['TIGHTMU'].sum() ==
                          1)  # one muon passing TIGHT criteria
            selection.add('loose_MU', df['Muon']['LOOSEMU'].sum() ==
                          1)  # one muon passing LOOSE criteria
            #selection.add('loose_or_tight_MU', (df['Muon']['LOOSEMU'] | df['Muon']['TIGHTMU']).sum() == 1) # one muon passing LOOSE or TIGHT criteria
            ## electrons
            selection.add('tight_EL', df['Electron']['TIGHTEL'].sum() ==
                          1)  # one electron passing TIGHT criteria
            selection.add('loose_EL', df['Electron']['LOOSEEL'].sum() ==
                          1)  # one electron passing LOOSE criteria
            #selection.add('loose_or_tight_EL', (df['Electron']['LOOSEEL'] | df['Electron']['TIGHTEL']).sum() == 1) # one electron passing LOOSE or TIGHT criteria

            #set_trace()
            ### apply lepton SFs to MC (only applicable to tight leptons)
            if 'LeptonSF' in corrections.keys():
                tight_mu_cut = selection.require(
                    objselection=True, tight_MU=True
                )  # find events passing muon object selection with one tight muon
                tight_muons = df['Muon'][tight_mu_cut][(
                    df['Muon'][tight_mu_cut]['TIGHTMU'] == True)]
                evt_weights._weights['Muon_SF'][
                    tight_mu_cut] = MCWeights.get_lepton_sf(
                        year=args.year,
                        lepton='Muons',
                        corrections=lepSF_correction,
                        pt=tight_muons.pt.flatten(),
                        eta=tight_muons.eta.flatten())
                tight_el_cut = selection.require(
                    objselection=True, tight_EL=True
                )  # find events passing electron object selection with one tight electron
                tight_electrons = df['Electron'][tight_el_cut][(
                    df['Electron'][tight_el_cut]['TIGHTEL'] == True)]
                evt_weights._weights['Electron_SF'][
                    tight_el_cut] = MCWeights.get_lepton_sf(
                        year=args.year,
                        lepton='Electrons',
                        corrections=lepSF_correction,
                        pt=tight_electrons.pt.flatten(),
                        eta=tight_electrons.etaSC.flatten())

                ## apply btagging SFs to MC
            if corrections['BTagSF'] == True:
                #set_trace()
                threeJets_cut = selection.require(objselection=True,
                                                  jets_3=True)
                #deepjet_3j_wts = self.corrections['BTag_Constructors']['DeepJet']['3Jets'].get_scale_factor(jets=df['Jet'][threeJets_cut], passing_cut='DeepJet'+wps_to_use[0])
                #evt_weights._weights['DeepJet'][threeJets_cut] = deepjet_3j_wts['central'].prod()
                deepcsv_3j_wts = self.corrections['BTag_Constructors'][
                    'DeepCSV']['3Jets'].get_scale_factor(
                        jets=df['Jet'][threeJets_cut],
                        passing_cut='DeepCSV' + wps_to_use[0])
                evt_weights._weights['DeepCSV'][
                    threeJets_cut] = deepcsv_3j_wts['central'].prod()

                fourplusJets_cut = selection.require(objselection=True,
                                                     jets_4p=True)
                #deepjet_4pj_wts = self.corrections['BTag_Constructors']['DeepJet']['4PJets'].get_scale_factor(jets=df['Jet'][fourplusJets_cut], passing_cut='DeepJet'+wps_to_use[0])
                #evt_weights._weights['DeepJet'][fourplusJets_cut] = deepjet_4pj_wts['central'].prod()
                deepcsv_4pj_wts = self.corrections['BTag_Constructors'][
                    'DeepCSV']['4PJets'].get_scale_factor(
                        jets=df['Jet'][fourplusJets_cut],
                        passing_cut='DeepCSV' + wps_to_use[0])
                evt_weights._weights['DeepCSV'][
                    fourplusJets_cut] = deepcsv_4pj_wts['central'].prod()

            # don't use ttbar events with indices % 10 == 0, 1, 2
            if self.sample_name in Nominal_ttJets:
                events = df.event
                selection.add(
                    'keep_ttbar',
                    ~np.stack([((events % 10) == idx) for idx in [0, 1, 2]],
                              axis=1).any(axis=1))
                for lepton in regions.keys():
                    for lepcat in regions[lepton].keys():
                        for btagregion in regions[lepton][lepcat].keys():
                            for jmult in regions[lepton][lepcat][
                                    btagregion].keys():
                                sel = regions[lepton][lepcat][btagregion][
                                    jmult]
                                sel.update({'keep_ttbar'})

        #set_trace()
        ## fill hists for each region
        for lepton in regions.keys():
            lepSF_to_exclude = 'Electron_SF' if lepton == 'Muon' else 'Muon_SF'
            btagSF_to_exclude = 'DeepJet'
            for lepcat in regions[lepton].keys():
                for btagregion in regions[lepton][lepcat].keys():
                    for jmult in regions[lepton][lepcat][btagregion].keys():
                        cut = selection.all(
                            *regions[lepton][lepcat][btagregion][jmult])
                        #set_trace()

                        if cut.sum() > 0:
                            ltype = 'MU' if lepton == 'Muon' else 'EL'
                            if 'loose_or_tight_%s' % ltype in regions[lepton][
                                    lepcat][btagregion][jmult]:
                                lep_mask = ((df[lepton][cut]['TIGHT%s' % ltype]
                                             == True) |
                                            (df[lepton][cut]['LOOSE%s' % ltype]
                                             == True))
                            elif 'tight_%s' % ltype in regions[lepton][lepcat][
                                    btagregion][jmult]:
                                lep_mask = (df[lepton][cut]['TIGHT%s' %
                                                            ltype] == True)
                            elif 'loose_%s' % ltype in regions[lepton][lepcat][
                                    btagregion][jmult]:
                                lep_mask = (df[lepton][cut]['LOOSE%s' %
                                                            ltype] == True)
                            else:
                                raise ValueError(
                                    "Not sure what lepton type to choose for event"
                                )

                                ## calculate MT
                            MT = make_vars.MT(df[lepton][cut][lep_mask],
                                              df['MET'][cut])
                            MTHigh = (MT >= MTcut).flatten()

                            evt_weights_to_use = evt_weights.weight()
                            if not self.isData:
                                evt_weights_to_use = evt_weights.partial_weight(
                                    exclude=[
                                        lepSF_to_exclude, btagSF_to_exclude
                                    ])

                            jets = df['Jet'][cut][MTHigh]
                            leptons = df[lepton][cut][lep_mask][MTHigh]

                            btagSF = np.ones(
                                MTHigh.size
                            ) if self.isData else evt_weights._weights[
                                btaggers[0]][cut][MTHigh].flatten()
                            lepSF = np.ones(
                                MTHigh.size
                            ) if self.isData else evt_weights._weights[
                                '%s_SF' % lepton][cut][MTHigh].flatten()
                            tot_weight = evt_weights_to_use[cut][
                                MTHigh].flatten()
                            #set_trace()
                            output['BTagSF'].fill(dataset=self.sample_name,
                                                  jmult=jmult,
                                                  leptype=lepton,
                                                  lepcat=lepcat,
                                                  btag=btagregion,
                                                  sf=btagSF)
                            output['LepSF'].fill(dataset=self.sample_name,
                                                 jmult=jmult,
                                                 leptype=lepton,
                                                 lepcat=lepcat,
                                                 btag=btagregion,
                                                 sf=lepSF)
                            output['EvtWeight'].fill(dataset=self.sample_name,
                                                     jmult=jmult,
                                                     leptype=lepton,
                                                     lepcat=lepcat,
                                                     btag=btagregion,
                                                     sf=tot_weight)

                            output = self.fill_hists(accumulator=output,
                                                     jetmult=jmult,
                                                     leptype=lepton,
                                                     lepcat=lepcat,
                                                     btag=btagregion,
                                                     jets=jets,
                                                     leptons=leptons,
                                                     MT=MT[MTHigh].flatten(),
                                                     evt_weights=tot_weight)

                            # check iso values for cut based wps
                            if lepton == 'Electron':
                                barrel_els = leptons[(np.abs(leptons.etaSC) <=
                                                      1.479)]
                                endcap_els = leptons[(np.abs(leptons.etaSC) >
                                                      1.479)]
                                tight_iso_cut_barrel = 0.0287 + 0.506 / barrel_els.pt
                                tight_iso_cut_endcap = 0.0445 + 0.963 / endcap_els.pt
                                tight_iso_passFail_barrel = barrel_els.pfRelIso < tight_iso_cut_barrel
                                tight_iso_passFail_endcap = endcap_els.pfRelIso < tight_iso_cut_endcap
                                output['El_iso_barrel'].fill(
                                    dataset=self.sample_name,
                                    jmult=jmult,
                                    leptype=lepton,
                                    lepcat=lepcat,
                                    btag=btagregion,
                                    iso_passfail=tight_iso_passFail_barrel.
                                    flatten().astype(int),
                                    weight=tot_weight[(np.abs(leptons.etaSC) <=
                                                       1.479).flatten()])
                                output['El_iso_endcap'].fill(
                                    dataset=self.sample_name,
                                    jmult=jmult,
                                    leptype=lepton,
                                    lepcat=lepcat,
                                    btag=btagregion,
                                    iso_passfail=tight_iso_passFail_endcap.
                                    flatten().astype(int),
                                    weight=tot_weight[(np.abs(leptons.etaSC) >
                                                       1.479).flatten()])

        return output
コード例 #6
0
    def process(self, events):
        #assert(len(np.unique(events.event)) == len((events.event)))
        dataset = events.metadata['dataset']
        print('process dataset', dataset)
        isRealData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        weights = processor.Weights(len(events))
        output = self.accumulator.identity()
        if(len(events) == 0): return output
        if not isRealData:
            output['sumw'][dataset] += events.genWeight.sum()

       


        # trigger paths
        if isRealData:
            trigger_fatjet = np.zeros(events.size, dtype='bool')
            for t in self._triggers[self._year]:
                try:
                    trigger_fatjet = trigger_fatjet | events.HLT[t]
                except:
                    print('trigger %s not available'%t)
                    continue

            trigger_muon = np.zeros(events.size, dtype='bool')
            for t in self._muontriggers[self._year]:
                trigger_muon = trigger_muon | events.HLT[t]
 
        else:
            trigger_fatjet = np.ones(events.size, dtype='bool')
            trigger_muon = np.ones(events.size, dtype='bool')

        selection.add('fatjet_trigger', trigger_fatjet)
        selection.add('muon_trigger', trigger_muon) 

        # run model on PFCands associated to FatJet (FatJetPFCands)
        #events.FatJet.array.content["PFCands"] = type(events.FatJetPFCands.array).fromcounts(events.FatJet.nPFConstituents.flatten(), events.FatJetPFCands.flatten())
        #events.FatJet.array.content["twoProngGru"] = run_model(events.FatJet.flatten())
   
        #else:
        #  events.FatJet["genMatchFull"] = np.ones(len(events))
        fatjets = events.FatJet
        gru = events.GRU
        IN  = events.IN
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['rhocorr'] = 2*np.log(fatjets.msdcorr/fatjets.pt)
        fatjets['gruddt'] = gru.v25 - shift(fatjets,algo='gruddt',year=self._year)
        fatjets['in_v3_ddt'] = IN.v3 - shift(fatjets,algo='inddt',year=self._year)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets,year=self._year)
        #fatjets['count'] = fatjets.count
        if 'WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset: fatjets["genMatchFull"] = genmatch(events)
        else: fatjets["genMatchFull"] = fatjets.pt.zeros_like() #np.zeros(events.size, dtype='bool') 

        candidatejet = fatjets[
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
        ][:, 0:1]

        # basic jet selection
        selection.add('minjetkin', ( 
            (candidatejet.pt >= 450)
            #& (candidatejet.msdcorr >= 40.)
            & (abs(candidatejet.eta) < 2.5)
            & (candidatejet.rhocorr >= -5.5)
            & (candidatejet.rhocorr <= -2)
        ).any())
        selection.add('signal_pt', (
            (candidatejet.pt >= 525)
        ).any())

        selection.add('mass', (candidatejet.msdcorr >= 40.).any())
        selection.add('v_selection_jetkin', ( 
            (candidatejet.pt >= 200)
            & (candidatejet.rhocorr >= -5.5)
            & (candidatejet.rhocorr <= -2)
        ).any())
        selection.add('genmatch', candidatejet.genMatchFull.pad(1).fillna(0).flatten() if ('WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset) else candidatejet.pt.pad(1).fillna(0).flatten().astype(bool))
        #if isRealData:
        #   selection.add('blinding', (
        #      (events.event %10 == 0)
        #   ))
        selection.add('n2ddt', (candidatejet.n2ddt < 0.).any())
        selection.add('jetid', candidatejet.isTight.any())
        selection.add('met', events.MET.pt > 40.) 

        goodmuon = (
            (events.Muon.pt > 10)
            & (abs(events.Muon.eta) < 2.1)
            #& (events.Muon.pfRelIso04_all < 0.4)
            #& (events.Muon.looseId).astype(bool)
        )
        nmuons=goodmuon.sum()
        leadingmuon = events.Muon[goodmuon 
        #& (events.Muon.pt > 55)
        ][:, 0:1]
        muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True)
 
        ngoodmuons = goodmuon[events.Muon.pt > 55].sum()

        selection.add('muonDphiAK8', (
            abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 2*np.pi/3
        ).all().all())

        

        selection.add('muonkin', (
            (leadingmuon.pt > 55.)
            & (abs(leadingmuon.eta) < 2.1)
            #& (leadingmuon.looseId).astype(bool)
        ).all())

        #ak4 puppi jet for CR
        jets = events.Jet[
            (events.Jet.pt > 50.)
            & (abs(events.Jet.eta) < 3)
            & (events.Jet.isTight).astype(bool)
        ]

        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        dr = abs(ak4_ak8_pair.i0.delta_r(ak4_ak8_pair.i1))
        ak4_away = jets[(dr > 0.8).all()]
        #selection.add('ak4btagMedium08', ak4_away.btagDeepB.max() > 0.4941)
        selection.add('ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838)

        #generic lep veto

        nelectrons = (
            (events.Electron.pt > 10.)
            & (abs(events.Electron.eta) < 2.5)
            & (events.Electron.cutBased >= events.Electron.LOOSE)
        ).sum()

        ntaus = (
            (events.Tau.pt > 20.)
            & (events.Tau.idDecayMode).astype(bool)
            # bacon iso looser than Nano selection
        ).sum()
        selection.add('onemuon', (ngoodmuons==1)& (nelectrons == 0) & (ntaus == 0))
        selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('noelectron_notau', (nelectrons == 0) & (ntaus == 0))
     
        if not isRealData: 
            weights.add('genweight', events.genWeight)
            #add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            #add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) signal region only
            bosons = getBosons(events)
            genBosonPt = bosons.pt.pad(1, clip=True).fillna(0)
            add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)  
            #b-tag weights
        regions = {
           'signal'                 : ['fatjet_trigger','minjetkin','signal_pt','mass','noleptons','jetid','genmatch'],
           'ttbar_muoncontrol'      : ['muon_trigger', 'minjetkin','jetid', 'mass', 'muonDphiAK8','muonkin','ak4btagMedium08','onemuon',],
           'noselection' : [],#'vselection_muoncontrol' : ['muon_trigger', 'v_selection_jetkin', 'genmatch', 'jetid', 'ak4btagMedium08', 'muonkin','met'],
        }
        #if isRealData and 'SingleMuon' not in dataset:
        #    regions['signal'].append('blinding')
        '''for region, cuts in regions.items():
            allcuts = set() 
            print ('weights', weights.weight().shape)
            print( len(events)) 
            output['cutflow'].fill(dataset=dataset, region=region, cut=0)#,weight=weights.weight())
            
            for i, cut in enumerate(cuts):
                 
                allcuts.add(cut)
                cut = selection.all(*allcuts)
                output['cutflow'].fill(dataset=dataset, region=region, cut=i + 1)# weight=weights.weight()[cut])
        '''
        allcuts_signal = set()
        output['cutflow_signal'][dataset]['none']+= float(weights.weight().sum())
        allcuts_ttbar_muoncontrol = set()
        output['cutflow_ttbar_muoncontrol'][dataset]['none']+= float(weights.weight().sum())
  
        for cut in regions['signal']:
            allcuts_signal.add(cut)
            output['cutflow_signal'][dataset][cut] += float(weights.weight()[selection.all(*allcuts_signal)].sum())

        for cut in regions['ttbar_muoncontrol']:
            allcuts_ttbar_muoncontrol.add(cut)
            output['cutflow_ttbar_muoncontrol'][dataset][cut] += float(weights.weight()[selection.all(*allcuts_ttbar_muoncontrol)].sum())

        def normalize(val, cut):
            return val[cut].pad(1, clip=True).fillna(0).flatten()

        def fill(region, systematic=None, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            weight = weights.weight()[cut]
            output['templates'].fill(
                dataset=dataset,
                region=region,
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                #gruddt=normalize(candidatejet.gruddt, cut),
                #n2=normalize(candidatejet.n2b1, cut),
                #gru=normalize(candidatejet.twoProngGru, cut),
                #rho=normalize(candidatejet.rhocorr, cut),
                in_v3_ddt=normalize(candidatejet.in_v3_ddt, cut),
                #nPFConstituents=normalize(candidatejet.nPFConstituents, cut),
                #nJet=candidatejet.counts[cut],
                #Vmatch=normalize(candidatejet.genMatchFull, cut),
                mu_pt=normalize(leadingmuon.pt, cut),
                mu_pfRelIso04_all=normalize(leadingmuon.pfRelIso04_all, cut),
                weight=weight,
            )

        for region in regions:
            fill(region)

        return output
コード例 #7
0
    def process(self, df):
        dataset = df['dataset']
        if self._debug:
            print("Processing dataframe from", dataset)
        isRealData = dataset in ["JetHT", "SingleMuon", "data_obs_mu", "data_obs_jet"]

        self.build_leading_ak8_variables(df)
        self.build_subleading_ak8_variables(df)
        self.build_ak4_variables(df)
        self.build_met_systematics(df)
        df['muon_dphi'] = np.abs(deltaphi(df['vmuoLoose0_phi'], df['AK8Puppijet0_phi']))

        selection = processor.PackedSelection()
        if isRealData:
            # Only take jet triggers from JetHT, single muon triggers from SingleMuon dataset
            # necessary but not sufficient condition to prevent double-counting
            # (this plus mutually exclusive offline selections are sufficient)
            selection.add('trigger', (df['triggerBits'] & self._corrections[f'{self._year}_triggerMask']).astype('bool') & (dataset=="JetHT"))
            selection.add('mutrigger', ((df['triggerBits']&1) & df['passJson']).astype('bool') & (dataset=="SingleMuon"))
            if self._debug:
                print("Trigger pass/all", selection.all('trigger').sum(), df.size)
                print("Muon trigger pass/all", selection.all('mutrigger').sum(), df.size)
        else:
            selection.add('trigger', np.ones(df.size, dtype='bool'))
            selection.add('mutrigger', np.ones(df.size, dtype='bool'))

        btagLooseWPs = {
            '2016': 0.6321,
            '2017': 0.4941,
            '2018': 0.4184,
        }

        selection.add('noLeptons', (df['neleLoose']==0) & (df['nmuLoose']==0) & (df['ntau']==0))
        selection.add('oneMuon', (df['neleLoose']==0) & (df['nmuLoose']==1) & (df['ntau']==0))
        selection.add('muonAcceptance', (df['vmuoLoose0_pt'] > 55.) & (np.abs(df['vmuoLoose0_eta']) < 2.1))
        selection.add('muonDphiAK8', df['muon_dphi'] > 2*np.pi/3)
        selection.add('ak4btagMediumDR08', df['ak4_leadingDeepCSV_dR08'] > btagLooseWPs[self._year])  # at least one passes medium cut
        selection.add('antiak4btagMediumOppHem', df['opposite_ak4_leadingDeepCSV'] < btagLooseWPs[self._year])  # none pass
        selection.add('tightVjet', df['AK8Puppijet0_isTightVJet'] != 0)
        selection.add('n2ddtPass', df['ak8jet_n2ddt'] < 0)
        selection.add('jetMass', df['AK8Puppijet0_msd'] > 40.)
        selection.add('deepcvb', df['AK8Puppijet0_deepdoublecvb'] > 0.2)

        selection.add('jetKinematics', df['AK8Puppijet0_pt'] > 450.)
        selection.add('jetKinematicsMuonCR', df['AK8Puppijet0_pt'] > 400.)
        selection.add('pfmet', df['pfmet'] < 140.)

        regions = {}
        regions['noselection'] = {}
        regions['preselection'] = {'trigger', 'noLeptons'}
        regions['signalregion'] = {'trigger', 'noLeptons', 'jetKinematics', 'pfmet', 'n2ddtPass', 'tightVjet', 'antiak4btagMediumOppHem'}
        regions['muoncontrol'] = {'mutrigger', 'oneMuon', 'muonAcceptance', 'jetKinematicsMuonCR', 'n2ddtPass', 'tightVjet', 'ak4btagMediumDR08', 'muonDphiAK8'}
        regions['hCCsignalregion'] = {'trigger', 'noLeptons', 'jetKinematics', 'pfmet', 'n2ddtPass', 'tightVjet', 'antiak4btagMediumOppHem', 'deepcvb'}
        regions['hCCmuoncontrol'] = {'mutrigger', 'oneMuon', 'muonAcceptance', 'jetKinematicsMuonCR', 'n2ddtPass', 'tightVjet', 'ak4btagMediumDR08', 'muonDphiAK8', 'deepcvb'}

        shiftSystematics = ['JESUp', 'JESDown', 'JERUp', 'JERDown']
        shiftedQuantities = {'AK8Puppijet0_pt', 'pfmet'}
        shiftedSelections = {'jetKinematics', 'jetKinematicsMuonCR', 'pfmet'}
        for syst in shiftSystematics:
            selection.add('jetKinematics'+syst, df['AK8Puppijet0_pt_'+syst] > 450)
            selection.add('jetKinematicsMuonCR'+syst, df['AK8Puppijet0_pt_'+syst] > 400.)
            selection.add('pfmet'+syst, df['pfmet_'+syst] < 140.)

        # mass shift applied only to V-matched data
        # https://github.com/kakwok/ZPrimePlusJet/blob/PerBinEff/fitting/PbbJet/buildRhalphabetHbb.py#L30
        if not isRealData:
            shiftSystematics.append('matchedUp')
            shiftedQuantities.add('AK8Puppijet0_msd')
            msdshifts = {'2016': 1.001, '2017': 0.979, '2018': 0.970}
            df['AK8Puppijet0_msd_matchedUp'] = msdshifts[self._year] * df['AK8Puppijet0_msd']

        weights = processor.Weights(df.size)

        if not isRealData:
            # SumWeights is sum(scale1fb), so we need to use full value here
            weights.add('genweight', df['scale1fb'])

        if not self._skipPileup:
            if self._year == '2017' and dataset in self._corrections['2017_pileupweight_dataset']:
                weights.add('pileupweight',
                            self._corrections['2017_pileupweight_dataset'][dataset](df['npu']),
                            self._corrections['2017_pileupweight_dataset_puUp'][dataset](df['npu']),
                            self._corrections['2017_pileupweight_dataset_puDown'][dataset](df['npu']),
                            )
            elif self._year != '2017':
                weights.add('pileupweight',
                            self._corrections[f'{self._year}_pileupweight'](df['npu']),
                            self._corrections[f'{self._year}_pileupweight_puUp'](df['npu']),
                            self._corrections[f'{self._year}_pileupweight_puDown'](df['npu']),
                            )

        # TODO unc.
        if self._year == '2017' and 'ZJetsToQQ_HT' in dataset:
            nlo_over_lo_qcd = self._corrections['2017_Z_nlo_qcd'](df['genVPt'])
            nlo_over_lo_ewk = self._corrections['Z_nlo_over_lo_ewk'](df['genVPt'])
            weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk)
        elif self._year == '2017' and 'WJetsToQQ_HT' in dataset:
            nlo_over_lo_qcd = self._corrections['2017_W_nlo_qcd'](df['genVPt'])
            nlo_over_lo_ewk = self._corrections['W_nlo_over_lo_ewk'](df['genVPt'])
            weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk)
        elif self._year == '2016' and 'DYJetsToQQ' in dataset:
            nlo_over_lo_qcd = self._corrections['2016_Z_nlo_qcd'](df['genVPt'])
            nlo_over_lo_ewk = self._corrections['Z_nlo_over_lo_ewk'](df['genVPt'])
            weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk)
        elif self._year == '2016' and 'WJetsToQQ' in dataset:
            nlo_over_lo_qcd = self._corrections['2016_W_nlo_qcd'](df['genVPt'])
            nlo_over_lo_ewk = self._corrections['W_nlo_over_lo_ewk'](df['genVPt'])
            weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk)

        if not isRealData:
            # handle weight systematics for signal region
            def regionMask(w):
                if self._skipTrigger:
                    return np.ones(df.size)
                return np.where(selection.all('noLeptons'), w, 1.)
            weights.add('trigweight',
                        regionMask(self._corrections[f'{self._year}_trigweight_msd_pt'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])),
                        regionMask(self._corrections[f'{self._year}_trigweight_msd_pt_trigweightUp'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])),
                        regionMask(self._corrections[f'{self._year}_trigweight_msd_pt_trigweightDown'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])),
                        )
            vmatch = (np.abs(deltaphi(df['AK8Puppijet0_phi'], df['genVPhi'])) < 0.8) & (np.abs(df['AK8Puppijet0_pt']-df['genVPt'])/df['genVPt'] < 0.5) & (np.abs(df['AK8Puppijet0_msd']-df['genVMass'])/df['genVMass'] < 0.3)
            weights.add('matched', np.ones(df.size, dtype='f'), vmatch.astype('f'), 1.-vmatch)

            # handle weight systematics for muon CR
            def regionMask(w):
                if self._skipTrigger:
                    return np.ones(df.size)
                return np.where(selection.all('oneMuon'), w, 1.)
            mu_abseta = np.abs(df['vmuoLoose0_eta'])
            weights.add('mutrigweight',
                        regionMask(self._corrections[f'{self._year}_mutrigweight_pt_abseta'](df['vmuoLoose0_pt'], mu_abseta)),
                        regionMask(self._corrections[f'{self._year}_mutrigweight_pt_abseta_mutrigweightShift'](df['vmuoLoose0_pt'], mu_abseta)),
                        shift=True
                        )
            weights.add('muidweight',
                        regionMask(self._corrections[f'{self._year}_muidweight_abseta_pt'](mu_abseta, df['vmuoLoose0_pt'])),
                        regionMask(self._corrections[f'{self._year}_muidweight_abseta_pt_muidweightShift'](mu_abseta, df['vmuoLoose0_pt'])),
                        shift=True
                        )
            weights.add('muisoweight',
                        regionMask(self._corrections[f'{self._year}_muisoweight_abseta_pt'](mu_abseta, df['vmuoLoose0_pt'])),
                        regionMask(self._corrections[f'{self._year}_muisoweight_abseta_pt_muisoweightShift'](mu_abseta, df['vmuoLoose0_pt'])),
                        shift=True
                        )

        if self._debug:
            print("Weight statistics:")
            pprint.pprint(weights._weightStats, indent=4)

        hout = self.accumulator.identity()
        for histname, h in hout.items():
            if not isinstance(h, hist.Hist):
                continue
            if not all(k in df or k == 'systematic' for k in h.fields):
                # Cannot fill this histogram due to missing fields
                # is this an error, warning, or ignorable?
                if self._debug:
                    print("Missing fields %r from %r" % (set(h.fields) - set(df.keys()), h))
                continue
            fields = {k: df[k] for k in h.fields if k in df}
            region = [r for r in regions.keys() if r in histname.split('_')]

            if 'nminus1' in histname:
                _, sel, region = histname.split('_')
                cut = regions[region] - {sel}
                weight = weights.weight() * selection.all(*cut)
                h.fill(**fields, weight=weight)
            elif len(region) == 1:
                region = region[0]
                weight = weights.weight()
                cut = selection.all(*regions[region])
                h.fill(systematic="", **fields, weight=weight*cut)
                if 'systematic' in h.fields:
                    if self._debug:
                        print("Filling systematics for %s" % histname)
                    systs = set(weights.variations)
                    systs.update(shiftSystematics)
                    for syst in systs:
                        if self._debug:
                            print("  Filling systematic %s" % syst)
                        fields_syst = fields
                        for val in shiftedQuantities:
                            if val+'_'+syst in df:
                                fields_syst[val] = df[val+'_'+syst]
                                if self._debug:
                                    print("    Replacing field %s with %s" % (val, val+'_'+syst))
                        if syst in weights.variations:
                            weight_syst = weights.weight(syst)
                            if self._debug:
                                print("    Using modified weight")
                        else:
                            weight_syst = weight
                        if syst in set(shiftSystematics):
                            cut_syst = set()
                            for sel in regions[region]:
                                if sel in shiftedSelections and sel+syst in selection.names:
                                    cut_syst.add(sel+syst)
                                    if self._debug:
                                        print("    Replacing cut %s with systematic-shifted %s" % (sel, sel+syst))
                                else:
                                    cut_syst.add(sel)
                            cut_syst = selection.all(*cut_syst)
                        else:
                            cut_syst = cut
                        h.fill(systematic=syst, **fields_syst, weight=weight_syst*cut_syst)
            elif len(region) > 1:
                raise ValueError("Histogram '%s' has a name matching multiple region definitions: %r" % (histname, region))
            else:
                raise ValueError("Histogram '%s' does not fall into any region definitions." % (histname, ))

        if not isRealData:
            if 'skim_sumw' in df:
                # hacky way to only accumulate file-level information once
                if df['skim_sumw'] is not None:
                    hout['sumw'][dataset] += df['skim_sumw']
            else:
                hout['sumw'][dataset] += np.sum(df['scale1fb'])
        return hout
コード例 #8
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_data'] = is_data(dataset)

        if not df['is_data']:
            gen_v_pt = df['LHE_Vpt']

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        ak4, ak8, muons, electrons, taus, photons, hlt = setup_candidates(
            df, cfg)

        # Muons
        is_tight_muon = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (np.abs(muons.eta)<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = (
            (muons.counts == 1) *
            mt(muons.pt, muons.phi, df['MET_pt'], df['MET_phi'])).max()

        # Electrons
        is_tight_electron = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (np.abs(electrons.eta) < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts == 1) * mt(
            electrons.pt, electrons.phi, df['MET_pt'], df['MET_phi'])).max()

        # ak4
        jet_acceptance = np.abs(ak4.eta) < 2.4

        # B tagged ak4
        btag_cut = cfg.BTAG.CUTS[cfg.BTAG.algo][cfg.BTAG.wp]
        jet_btag_val = getattr(ak4, cfg.BTAG.algo)
        jet_btagged = jet_btag_val > btag_cut
        bjets = ak4[ jet_acceptance \
                     & jet_btagged \
                     & (ak4.pt>20) ]

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(df['MET_pt'], df['MET_phi'],
                                                   electrons, muons, photons)
        df["dPFCalo"] = (df['MET_pt'] - df["CaloMET_pt"]) / df["recoil_pt"]
        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4,
                                                  df['recoil_phi'],
                                                  njet=4,
                                                  ptmin=30)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4,
                                               df['MET_phi'],
                                               njet=4,
                                               ptmin=30)
        selection = processor.PackedSelection()

        selection.add('inclusive', np.ones(df.size) == 1)

        # Triggers
        if cfg.RUN.SYNC:  # Synchronization mode
            pass_all = np.ones(df.size) == 1
            selection.add('filt_met', pass_all)
            selection.add('trig_met', pass_all)
            selection.add('trig_ele', pass_all)
            selection.add('trig_mu', pass_all)

        else:
            selection.add('filt_met', df['Flag_METFilters'])
            selection.add('trig_met', combine_masks(df, cfg.TRIGGERS.MET))

            # Trigger overlap
            if df['is_data']:
                if "SinglePhoton" in dataset:
                    trig_ele = combine_masks(
                        df, cfg.TRIGGERS.ELECTRON.SINGLE_BACKUP) & (
                            ~combine_masks(df, cfg.TRIGGERS.ELECTRON.SINGLE))
                else:
                    trig_ele = combine_masks(df, cfg.TRIGGERS.ELECTRON.SINGLE)
            else:
                trig_ele = combine_masks(
                    df, cfg.TRIGGERS.ELECTRON.SINGLE_BACKUP) | combine_masks(
                        df, cfg.TRIGGERS.ELECTRON.SINGLE)

            selection.add('trig_ele', trig_ele)
            selection.add('trig_mu', combine_masks(df,
                                                   cfg.TRIGGERS.MUON.SINGLE))
            selection.add('trig_ht_for_g_eff',
                          combine_masks(df, cfg.TRIGGERS.HT.GAMMAEFF))

        # Trigger objects
        hlt_muons = hlt[hlt.id == 13]
        hlt_single_muons = hlt_muons[hlt_muons.filter & 8 == 8]
        hlt_double_muons = hlt_muons[hlt_muons.filter & 16 == 16]

        selection.add('one_hlt_muon', hlt_single_muons.counts >= 1)
        selection.add(
            'two_hlt_muons',
            (hlt_single_muons.counts + 2 * hlt_double_muons.counts) >= 2)

        # Common selection
        selection.add('veto_ele', electrons.counts == 0)
        selection.add('veto_muo', muons.counts == 0)
        selection.add('veto_photon', photons.counts == 0)
        selection.add('veto_tau', taus.counts == 0)
        selection.add('veto_b', bjets.counts == 0)
        selection.add('mindphijr',
                      df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('dpfcalo',
                      np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL)

        # AK4 Jet
        leadak4_index = ak4.pt.argmax()
        leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \
                         & (np.abs(ak4.eta[leadak4_index]) < cfg.SELECTION.SIGNAL.leadak4.ETA).any()
        selection.add('leadak4_pt_eta', leadak4_pt_eta)

        selection.add('leadak4_id',(ak4.tightId[leadak4_index] \
                                                    & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \
                                                    & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any())

        # AK8 Jet
        leadak8_index = ak8.pt.argmax()
        leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \
                         & (np.abs(ak8.eta[leadak8_index]) < cfg.SELECTION.SIGNAL.leadak8.ETA).any()
        selection.add('leadak8_pt_eta', leadak8_pt_eta)

        selection.add('leadak8_id', (ak8.tightId[leadak8_index]).any())

        # Mono-V selection
        selection.add('leadak8_tau21',
                      ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) <
                       cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any())
        selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \
                                    & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any())

        selection.add(
            'veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id",
                                        "leadak8_tau21", "leadak8_mass"))

        # Dimuon CR
        leadmuon_index = muons.pt.argmax()
        selection.add('at_least_one_tight_mu', is_tight_muon.any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge == 0).any())
        selection.add('two_muons', muons.counts == 2)

        # Single muon CR
        selection.add('one_muon', muons.counts == 1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index = electrons.pt.argmax()

        selection.add('one_electron', electrons.counts == 1)
        selection.add('two_electrons', electrons.counts == 2)
        selection.add('at_least_one_tight_el', is_tight_electron.any())

        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge == 0).any())
        selection.add('two_electrons', electrons.counts == 2)

        # Single Ele CR
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        selection.add('trig_photon',
                      combine_masks(df, cfg.TRIGGERS.PHOTON.SINGLE))
        leadphoton_index = photons.pt.argmax()

        is_tight_photon = photons.mediumId \
                         & (photons.pt > cfg.PHOTON.CUTS.TIGHT.PT) \
                         & (np.abs(photons.eta) < cfg.PHOTON.CUTS.TIGHT.ETA)

        selection.add('one_photon', photons.counts == 1)
        selection.add('at_least_one_tight_photon', is_tight_photon.any())

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if not df['is_data']:
            output['genvpt_check'].fill(vpt=gen_v_pt,
                                        type="Nano",
                                        dataset=dataset)

        # Weights
        evaluator = monojet_evaluator(cfg)
        all_weights = {}
        if df['is_data']:
            weight = np.ones(df.size)
        else:
            weight = df['Generator_weight']

            # Muon ID and Isolation for tight and loose WP
            # Function of pT, eta (Order!)
            all_weights["muon_id_tight"] = evaluator['muon_id_tight'](
                muons[is_tight_muon].pt, muons[is_tight_muon].eta).prod()
            all_weights["muon_iso_tight"] = evaluator['muon_iso_tight'](
                muons[is_tight_muon].pt, muons[is_tight_muon].eta).prod()
            all_weights["muon_id_loose"] = evaluator['muon_id_loose'](
                muons[~is_tight_muon].pt, muons[~is_tight_muon].eta).prod()
            all_weights["muon_iso_loose"] = evaluator['muon_iso_loose'](
                muons[~is_tight_muon].pt, muons[~is_tight_muon].eta).prod()

            # Electron ID and reco
            # Function of eta, pT (Other way round relative to muons!)
            all_weights["ele_reco"] = evaluator['ele_reco'](
                electrons.eta, electrons.pt).prod()
            all_weights["ele_id_tight"] = evaluator['ele_id_tight'](
                electrons[is_tight_electron].eta,
                electrons[is_tight_electron].pt).prod()
            all_weights["ele_id_loose"] = evaluator['ele_id_loose'](
                electrons[~is_tight_electron].eta,
                electrons[~is_tight_electron].pt).prod()

            # Photon ID and electron veto
            all_weights["photon_id_tight"] = evaluator['photon_id_tight'](
                photons[is_tight_photon].eta,
                photons[is_tight_photon].pt).prod()

            # CSEV not split only by EE/EB for now
            csev_sf_index = 0.5 * photons.barrel + 2.5 * ~photons.barrel
            all_weights["photon_csev"] = evaluator['photon_csev'](
                csev_sf_index).prod()

            all_weights["pileup"] = evaluator['pileup'](df['Pileup_nTrueInt'])

            if df['is_lo_w']:
                all_weights["theory"] = evaluator["qcd_ew_nlo_w"](gen_v_pt)
            elif df['is_lo_z']:
                all_weights["theory"] = evaluator["qcd_ew_nlo_z"](gen_v_pt)
            else:
                all_weights["theory"] = np.ones(df.size)
            for iw in all_weights.values():
                weight = weight * iw

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [df['MET_pt'][mask]]
                output['kinematics']['met_phi'] += [df['MET_phi'][mask]]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask]]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt]
                output['kinematics']['ak4eta0'] += [
                    ak4[leadak4_index][mask].eta
                ]
                output['kinematics']['leadbtag'] += [
                    jet_btag_val[jet_acceptance & (ak4.pt > 20)][mask].max()
                ]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [
                    muons[is_tight_muon].counts[mask]
                ]
                output['kinematics']['mupt0'] += [
                    muons[leadmuon_index][mask].pt
                ]
                output['kinematics']['mueta0'] += [
                    muons[leadmuon_index][mask].eta
                ]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [
                    electrons[is_tight_electron].counts[mask]
                ]
                output['kinematics']['elpt0'] += [
                    electrons[leadelectron_index][mask].pt
                ]
                output['kinematics']['eleta0'] += [
                    electrons[leadelectron_index][mask].eta
                ]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [
                    photons[is_tight_photon].counts[mask]
                ]
                output['kinematics']['gpt0'] += [
                    photons[leadphoton_index][mask].pt
                ]
                output['kinematics']['geta0'] += [
                    photons[leadphoton_index][mask].eta
                ]

        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        if not df['is_data']:
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']

        regions = monojet_regions()
        for region, cuts in regions.items():
            # Blinding
            if (self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region]['all'] += df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' + region][cutname] += selection.all(
                        *cuts[:icut + 1]).sum()

            mask = selection.all(*cuts)

            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])

            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=weight[mask])

            fill_mult('ak8_mult', ak8)
            fill_mult('ak4_mult', ak4)
            fill_mult('bjet_mult', bjets)
            fill_mult('loose_ele_mult', electrons)
            fill_mult('tight_ele_mult', electrons[is_tight_electron])
            fill_mult('loose_muo_mult', muons)
            fill_mult('tight_muo_mult', muons[is_tight_muon])
            fill_mult('tau_mult', taus)
            fill_mult('photon_mult', photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(dataset=dataset, region=region, **kwargs)

            # Monitor weights
            for wname, wvalue in all_weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, weight[mask])

            ezfill('ak4eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets)

            # Leading ak4
            leadak4_indices = ak4.pt.argmax()
            w_leadak4 = weight_shape(ak4[leadak4_indices].eta[mask],
                                     weight[mask])
            ezfill('ak4eta0',
                   jeteta=ak4[leadak4_indices].eta[mask].flatten(),
                   weight=w_leadak4)
            ezfill('ak4pt0',
                   jetpt=ak4[leadak4_indices].pt[mask].flatten(),
                   weight=w_leadak4)

            # All ak8
            w_allak8 = weight_shape(ak8.eta[mask], weight[mask])

            ezfill('ak8eta', jeteta=ak8[mask].eta.flatten(), weight=w_allak8)
            ezfill('ak8pt', jetpt=ak8[mask].pt.flatten(), weight=w_allak8)
            ezfill('ak8mass', mass=ak8[mask].mass.flatten(), weight=w_allak8)

            # Leading ak8
            leadak8_indices = ak8.pt.argmax()
            w_leadak8 = weight_shape(ak8[leadak8_indices].eta[mask],
                                     weight[mask])

            ezfill('ak8eta0',
                   jeteta=ak8[leadak8_indices].eta[mask].flatten(),
                   weight=w_leadak8)
            ezfill('ak8pt0',
                   jetpt=ak8[leadak8_indices].pt[mask].flatten(),
                   weight=w_leadak8)
            ezfill('ak8mass0',
                   mass=ak8[leadak8_indices].mass[mask].flatten(),
                   weight=w_leadak8)

            # B tag discriminator
            btag = getattr(ak4, cfg.BTAG.ALGO)
            w_btag = weight_shape(btag[mask], weight[mask])
            ezfill('ak4btag', btag=btag[mask].flatten(), weight=w_btag)

            # MET
            ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=weight[mask])
            ezfill('met', met=df["MET_pt"][mask], weight=weight[mask])
            ezfill('recoil', recoil=df["recoil_pt"][mask], weight=weight[mask])
            ezfill('dphijm',
                   dphi=df["minDPhiJetMet"][mask],
                   weight=weight[mask])

            # Muons
            w_allmu = weight_shape(muons.pt[mask], weight[mask])
            ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu)
            ezfill('muon_mt', mt=df['MT_mu'][mask], weight=weight[mask])
            ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu)
            # Dimuon
            w_dimu = weight_shape(dimuons.pt[mask], weight[mask])

            ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu)
            ezfill('dimuon_eta',
                   eta=dimuons.eta[mask].flatten(),
                   weight=w_dimu)
            ezfill('dimuon_mass',
                   dilepton_mass=dimuons.mass[mask].flatten(),
                   weight=w_dimu)

            # Electrons
            w_allel = weight_shape(electrons.pt[mask], weight[mask])
            ezfill('electron_pt',
                   pt=electrons.pt[mask].flatten(),
                   weight=w_allel)
            ezfill('electron_mt', mt=df['MT_el'][mask], weight=weight[mask])
            ezfill('electron_eta',
                   eta=electrons.eta[mask].flatten(),
                   weight=w_allel)

            # Dielectron
            w_diel = weight_shape(dielectrons.pt[mask], weight[mask])
            ezfill('dielectron_pt',
                   pt=dielectrons.pt[mask].flatten(),
                   weight=w_diel)
            ezfill('dielectron_eta',
                   eta=dielectrons.eta[mask].flatten(),
                   weight=w_diel)
            ezfill('dielectron_mass',
                   dilepton_mass=dielectrons.mass[mask].flatten(),
                   weight=w_diel)

            # Photon
            w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask],
                                            weight[mask])
            ezfill('photonpt0',
                   pt=photons[leadphoton_index].pt[mask].flatten(),
                   weight=w_leading_photon)
            ezfill('photoneta0',
                   eta=photons[leadphoton_index].eta[mask].flatten(),
                   weight=w_leading_photon)
            ezfill('photonphi0',
                   phi=photons[leadphoton_index].phi[mask].flatten(),
                   weight=w_leading_photon)

        return output
コード例 #9
0
ファイル: monojetProcessor.py プロジェクト: snwebb/bucoffea
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_v_jet'] = has_v_jet(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df['is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if not df['is_data']:
            gen = setup_gen_candidates(df)
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df['is_nlo_w']:
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_combined']
        elif df['is_lo_g']:
            gen_v_pt = gen[(gen.pdg==22) & (gen.status==1)].pt.max()

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons = setup_candidates(df, cfg)

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts==1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts==1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index=ak4.pt.argmax()

        elejet_pairs = ak4[:,:1].cross(electrons)
        df['dREleJet'] = np.hypot(elejet_pairs.i0.eta-elejet_pairs.i1.eta , dphi(elejet_pairs.i0.phi,elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:,:1].cross(muons)
        df['dRMuonJet'] = np.hypot(muonjet_pairs.i0.eta-muonjet_pairs.i1.eta , dphi(muonjet_pairs.i0.phi,muonjet_pairs.i1.phi)).min()

        # Photons
        # Angular distance leading photon - leading jet
        phojet_pairs = ak4[:,:1].cross(photons[:,:1])
        df['dRPhotonJet'] = np.hypot(phojet_pairs.i0.eta-phojet_pairs.i1.eta , dphi(phojet_pairs.i0.phi,phojet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons)
        df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]
        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=2.4)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=2.4)
        selection = processor.PackedSelection()



        # Triggers
        pass_all = np.ones(df.size)==1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)
        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts==0)
        selection.add('veto_muo', muons.counts==0)
        selection.add('veto_photon', photons.counts==0)
        selection.add('veto_tau', taus.counts==0)
        selection.add('veto_b', bjets.counts==0)
        selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('dpfcalo',np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL)

        if(cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC):
            selection.add('hemveto', df['hemveto'])
        else:
            selection.add('hemveto', np.ones(df.size)==1)

        # AK4 Jet
        leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \
                         & (ak4.abseta[leadak4_index] < cfg.SELECTION.SIGNAL.leadak4.ETA).any()
        selection.add('leadak4_pt_eta', leadak4_pt_eta)

        selection.add('leadak4_id',(ak4.tightId[leadak4_index] \
                                                    & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \
                                                    & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any())

        # AK8 Jet
        leadak8_index=ak8.pt.argmax()
        leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \
                         & (ak8.abseta[leadak8_index] < cfg.SELECTION.SIGNAL.leadak8.ETA).any()
        selection.add('leadak8_pt_eta', leadak8_pt_eta)

        selection.add('leadak8_id',(ak8.tightId[leadak8_index]).any())

        # Mono-V selection
        selection.add('leadak8_tau21', ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) < cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any())
        selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \
                                    & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any())
        selection.add('leadak8_wvsqcd_loosemd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.LOOSEMD)
                                    & (ak8.wvsqcdmd[leadak8_index] < cfg.WTAG.TIGHTMD)).any())
        selection.add('leadak8_wvsqcd_tightmd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.TIGHTMD)).any())
        selection.add('leadak8_wvsqcd_loose', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.LOOSE)
                                    & (ak8.wvsqcd[leadak8_index] < cfg.WTAG.TIGHT)).any())
        selection.add('leadak8_wvsqcd_tight', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.TIGHT)).any())

        selection.add('veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id", "leadak8_tau21", "leadak8_mass"))
        selection.add('only_one_ak8', ak8.counts==1)

        # Dimuon CR
        leadmuon_index=muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge==0).any())
        selection.add('two_muons', muons.counts==2)

        # Single muon CR
        selection.add('one_muon', muons.counts==1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index=electrons.pt.argmax()


        selection.add('one_electron', electrons.counts==1)
        selection.add('two_electrons', electrons.counts==2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())

        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge==0).any())
        selection.add('two_electrons', electrons.counts==2)

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index=photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId \
                         & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA)

        selection.add('one_photon', photons.counts==1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if gen_v_pt is not None:
            output['genvpt_check'].fill(vpt=gen_v_pt,type="Nano", dataset=dataset, weight=df['Generator_weight'])

        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons, electrons, photons)
            weights = pileup_weights(weights, df, evaluator, cfg)
            if not (gen_v_pt is None):
                weights = theory_weights_monojet(weights, df, evaluator, gen_v_pt)

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask].flatten()]
                output['kinematics']['met_phi'] += [met_phi[mask].flatten()]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask].flatten()]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask].flatten()]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt.flatten()]
                output['kinematics']['ak4eta0'] += [ak4[leadak4_index][mask].eta.flatten()]
                output['kinematics']['leadbtag'] += [ak4.pt.max()<0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [muons[df['is_tight_muon']].counts[mask].flatten()]
                output['kinematics']['mupt0'] += [muons[leadmuon_index][mask].pt.flatten()]
                output['kinematics']['mueta0'] += [muons[leadmuon_index][mask].eta.flatten()]
                output['kinematics']['muphi0'] += [muons[leadmuon_index][mask].phi.flatten()]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [electrons[df['is_tight_electron']].counts[mask].flatten()]
                output['kinematics']['elpt0'] += [electrons[leadelectron_index][mask].pt.flatten()]
                output['kinematics']['eleta0'] += [electrons[leadelectron_index][mask].eta.flatten()]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [photons[df['is_tight_photon']].counts[mask].flatten()]
                output['kinematics']['gpt0'] += [photons[leadphoton_index][mask].pt.flatten()]
                output['kinematics']['geta0'] += [photons[leadphoton_index][mask].eta.flatten()]


        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] +=  df['genEventSumw']
            output['sumw2'][dataset] +=  df['genEventSumw2']
            output['sumw_pileup'][dataset] +=  weights.partial_weight(include=['pileup']).sum()

        regions = monojet_regions(cfg)

        for region, cuts in regions.items():
            region_weights = copy.deepcopy(weights)
            if not df['is_data']:
                if re.match(r'cr_(\d+)e.*', region):
                    region_weights.add('trigger', np.ones(df.size))
                elif re.match(r'cr_(\d+)m.*', region) or re.match('sr_.*', region):
                    region_weights.add('trigger', evaluator["trigger_met"](df['recoil_pt']))
                elif re.match(r'cr_g.*', region):
                    region_weights.add('trigger', np.ones(df.size))

            if not df['is_data']:
                genVs = gen[((gen.pdg==23) | (gen.pdg==24) | (gen.pdg==-24)) & (gen.pt>10)]
                leadak8 = ak8[ak8.pt.argmax()]
                leadak8_matched_mask = leadak8.match(genVs, deltaRCut=0.8)
                matched_leadak8 = leadak8[leadak8_matched_mask]
                unmatched_leadak8 = leadak8[~leadak8_matched_mask]
                for wp in ['loose','loosemd','tight','tightmd']:
                    if re.match(r'.*_{wp}_v.*', region):

                        if (wp == 'tight') or ('nomistag' in region): # no mistag SF available for tight cut
                            matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod()
                        else:
                            matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() \
                                    * evaluator[f'wtag_mistag_{wp}'](unmatched_leadak8.pt).prod()

                        region_weights.add('wtag_{wp}', matched_weights)



            # Blinding
            if(self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region]['all']+=df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' + region][cutname] += selection.all(*cuts[:icut+1]).sum()

            mask = selection.all(*cuts)


            if cfg.RUN.SAVE.TREE:
                def fill_tree(variable, values):
                    treeacc = processor.column_accumulator(values)
                    name = f'tree_{region}_{variable}'
                    if dataset in output[name].keys():
                        output[name][dataset] += treeacc
                    else:
                        output[name][dataset] = treeacc
                if region in ['cr_2m_j','cr_1m_j','cr_2e_j','cr_1e_j','cr_g_j']:
                    fill_tree('recoil',df['recoil_pt'][mask].flatten())
                    fill_tree('weight',region_weights.weight()[mask].flatten())
                    if gen_v_pt is not None:
                        fill_tree('gen_v_pt',gen_v_pt[mask].flatten())
                    else:
                        fill_tree('gen_v_pt', -1 * np.ones(sum(mask)))
            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])


            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(
                                  dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=region_weights.weight()[mask]
                                  )

            fill_mult('ak8_mult', ak8)
            fill_mult('ak4_mult', ak4)
            fill_mult('bjet_mult',bjets)
            fill_mult('loose_ele_mult',electrons)
            fill_mult('tight_ele_mult',electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult',muons)
            fill_mult('tight_muo_mult',muons[df['is_tight_muon']])
            fill_mult('tau_mult',taus)
            fill_mult('photon_mult',photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(
                                  dataset=dataset,
                                  region=region,
                                  **kwargs
                                  )
            # Monitor weights
            for wname, wvalue in region_weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, region_weights.weight()[mask])

            ezfill('ak4_eta',    jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi',    jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_eta_phi', phi=ak4[mask].phi.flatten(),eta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_pt',     jetpt=ak4[mask].pt.flatten(),   weight=w_alljets)

            # Leading ak4
            w_leadak4 = weight_shape(ak4[leadak4_index].eta[mask], region_weights.weight()[mask])
            ezfill('ak4_eta0',   jeteta=ak4[leadak4_index].eta[mask].flatten(),    weight=w_leadak4)
            ezfill('ak4_phi0',   jetphi=ak4[leadak4_index].phi[mask].flatten(),    weight=w_leadak4)
            ezfill('ak4_pt0',    jetpt=ak4[leadak4_index].pt[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_ptraw0',    jetpt=ak4[leadak4_index].ptraw[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_chf0',    frac=ak4[leadak4_index].chf[mask].flatten(),      weight=w_leadak4)
            ezfill('ak4_nhf0',    frac=ak4[leadak4_index].nhf[mask].flatten(),      weight=w_leadak4)

            ezfill('drelejet',    dr=df['dREleJet'][mask],      weight=region_weights.weight()[mask])
            ezfill('drmuonjet',    dr=df['dRMuonJet'][mask],      weight=region_weights.weight()[mask])
            ezfill('drphotonjet',    dr=df['dRPhotonJet'][mask],  weight=region_weights.weight()[mask])

            # AK8 jets
            if region=='inclusive' or region.endswith('v'):
                # All
                w_allak8 = weight_shape(ak8.eta[mask], region_weights.weight()[mask])

                ezfill('ak8_eta',    jeteta=ak8[mask].eta.flatten(), weight=w_allak8)
                ezfill('ak8_phi',    jetphi=ak8[mask].phi.flatten(), weight=w_allak8)
                ezfill('ak8_pt',     jetpt=ak8[mask].pt.flatten(),   weight=w_allak8)
                ezfill('ak8_mass',   mass=ak8[mask].mass.flatten(),  weight=w_allak8)

                # Leading
                w_leadak8 = weight_shape(ak8[leadak8_index].eta[mask], region_weights.weight()[mask])

                ezfill('ak8_eta0',       jeteta=ak8[leadak8_index].eta[mask].flatten(),    weight=w_leadak8)
                ezfill('ak8_phi0',       jetphi=ak8[leadak8_index].phi[mask].flatten(),    weight=w_leadak8)
                ezfill('ak8_pt0',        jetpt=ak8[leadak8_index].pt[mask].flatten(),      weight=w_leadak8 )
                ezfill('ak8_mass0',      mass=ak8[leadak8_index].mass[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_tau210',     tau21=ak8[leadak8_index].tau21[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_wvsqcd0',    tagger=ak8[leadak8_index].wvsqcd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_wvsqcdmd0',  tagger=ak8[leadak8_index].wvsqcdmd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_zvsqcd0',    tagger=ak8[leadak8_index].zvsqcd[mask].flatten(),     weight=w_leadak8)
                ezfill('ak8_zvsqcdmd0',  tagger=ak8[leadak8_index].zvsqcdmd[mask].flatten(),     weight=w_leadak8)

                # histogram with only gen-matched lead ak8 pt
                if not df['is_data']:
                    w_matchedleadak8 = weight_shape(matched_leadak8.eta[mask], region_weights.weight()[mask])
                    ezfill('ak8_Vmatched_pt0', jetpt=matched_leadak8.pt[mask].flatten(),      weight=w_matchedleadak8 )


                # Dimuon specifically for deepak8 mistag rate measurement
                if 'inclusive_v' in region:
                    ezfill('ak8_passloose_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtight_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloosemd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtightmd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, jetpt=ak8[leadak8_index].pt[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloose_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtight_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passloosemd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )
                    ezfill('ak8_passtightmd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, mass=ak8[leadak8_index].mass[mask].max(),      weight=w_leadak8 )

            # MET
            ezfill('dpfcalo',            dpfcalo=df["dPFCalo"][mask],       weight=region_weights.weight()[mask] )
            ezfill('met',                met=met_pt[mask],            weight=region_weights.weight()[mask] )
            ezfill('met_phi',            phi=met_phi[mask],            weight=region_weights.weight()[mask] )
            ezfill('recoil',             recoil=df["recoil_pt"][mask],      weight=region_weights.weight()[mask] )
            ezfill('recoil_phi',         phi=df["recoil_phi"][mask],      weight=region_weights.weight()[mask] )
            ezfill('recoil_nopog',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(include=['pileup','theory','gen','prefire'])[mask])
            ezfill('recoil_nopref',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['prefire'])[mask])
            ezfill('recoil_nopu',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('recoil_notrg',    recoil=df["recoil_pt"][mask],      weight=region_weights.partial_weight(exclude=['trigger'])[mask])
            ezfill('ak4_pt0_over_recoil',    ratio=ak4.pt.max()[mask]/df["recoil_pt"][mask],      weight=region_weights.weight()[mask])
            ezfill('dphijm',             dphi=df["minDPhiJetMet"][mask],    weight=region_weights.weight()[mask] )
            ezfill('dphijr',             dphi=df["minDPhiJetRecoil"][mask],    weight=region_weights.weight()[mask] )

            if 'noveto' in region:
                continue

            # Muons
            if '_1m_' in region or '_2m_' in region:
                w_allmu = weight_shape(muons.pt[mask], region_weights.weight()[mask])
                ezfill('muon_pt',   pt=muons.pt[mask].flatten(),    weight=w_allmu )
                ezfill('muon_mt',   mt=df['MT_mu'][mask],           weight=region_weights.weight()[mask])
                ezfill('muon_eta',  eta=muons.eta[mask].flatten(),  weight=w_allmu)
                ezfill('muon_eta_phi', phi=muons.phi[mask].flatten(),eta=muons.eta[mask].flatten(), weight=w_allmu)
                ezfill('muon_phi',  phi=muons.phi[mask].flatten(),  weight=w_allmu)
                ezfill('muon_dxy',  dxy=muons.dxy[mask].flatten(),  weight=w_allmu)
                ezfill('muon_dz',  dz=muons.dz[mask].flatten(),  weight=w_allmu)

                # Leading muon
                w_leadmu = weight_shape(muons[leadmuon_index].pt[mask], region_weights.weight()[mask])
                ezfill('muon_pt0',   pt=muons[leadmuon_index].pt[mask].flatten(),    weight=w_leadmu )
                ezfill('muon_eta0',  eta=muons[leadmuon_index].eta[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_phi0',  phi=muons[leadmuon_index].phi[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_dxy0',  dxy=muons[leadmuon_index].dxy[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_dz0',  dz=muons[leadmuon_index].dz[mask].flatten(),  weight=w_leadmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], region_weights.weight()[mask])

                ezfill('dimuon_pt',     pt=dimuons.pt[mask].flatten(),              weight=w_dimu)
                ezfill('dimuon_eta',    eta=dimuons.eta[mask].flatten(),            weight=w_dimu)
                ezfill('dimuon_mass',   dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu )
                ezfill('dimuon_dr',   dr=dimuons.i0.p4.delta_r(dimuons.i1.p4)[mask].flatten(), weight=w_dimu )

                ezfill('muon_pt1',   pt=muons[~leadmuon_index].pt[mask].flatten(),    weight=w_leadmu )
                ezfill('muon_eta1',  eta=muons[~leadmuon_index].eta[mask].flatten(),  weight=w_leadmu)
                ezfill('muon_phi1',  phi=muons[~leadmuon_index].phi[mask].flatten(),  weight=w_leadmu)

            # Electrons
            if '_1e_' in region or '_2e_' in region:
                w_allel = weight_shape(electrons.pt[mask], region_weights.weight()[mask])
                ezfill('electron_pt',   pt=electrons.pt[mask].flatten(),    weight=w_allel)
                ezfill('electron_mt',   mt=df['MT_el'][mask],               weight=region_weights.weight()[mask])
                ezfill('electron_eta',  eta=electrons.eta[mask].flatten(),  weight=w_allel)
                ezfill('electron_phi',  phi=electrons.phi[mask].flatten(),  weight=w_allel)
                ezfill('electron_eta_phi', phi=electrons.phi[mask].flatten(),eta=electrons.eta[mask].flatten(), weight=w_allel)
                ezfill('electron_dz',  dz=electrons.dz[mask].flatten(),  weight=w_allel)
                ezfill('electron_dxy',  dxy=electrons.dxy[mask].flatten(),  weight=w_allel)

                w_leadel = weight_shape(electrons[leadelectron_index].pt[mask], region_weights.weight()[mask])
                ezfill('electron_pt0',   pt=electrons[leadelectron_index].pt[mask].flatten(),    weight=w_leadel)
                ezfill('electron_eta0',  eta=electrons[leadelectron_index].eta[mask].flatten(),  weight=w_leadel)
                ezfill('electron_phi0',  phi=electrons[leadelectron_index].phi[mask].flatten(),  weight=w_leadel)

                w_trailel = weight_shape(electrons[~leadelectron_index].pt[mask], region_weights.weight()[mask])
                ezfill('electron_tightid1',  id=electrons[~leadelectron_index].tightId[mask].flatten(),  weight=w_trailel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask], region_weights.weight()[mask])
                ezfill('dielectron_pt',     pt=dielectrons.pt[mask].flatten(),                  weight=w_diel)
                ezfill('dielectron_eta',    eta=dielectrons.eta[mask].flatten(),                weight=w_diel)
                ezfill('dielectron_mass',   dilepton_mass=dielectrons.mass[mask].flatten(),     weight=w_diel)
                ezfill('dielectron_dr',   dr=dielectrons.i0.p4.delta_r(dielectrons.i1.p4)[mask].flatten(), weight=w_diel )

                ezfill('electron_pt1',   pt=electrons[~leadelectron_index].pt[mask].flatten(),    weight=w_leadel)
                ezfill('electron_eta1',  eta=electrons[~leadelectron_index].eta[mask].flatten(),  weight=w_leadel)
                ezfill('electron_phi1',  phi=electrons[~leadelectron_index].phi[mask].flatten(),  weight=w_leadel)
            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask],region_weights.weight()[mask]);
                ezfill('photon_pt0',              pt=photons[leadphoton_index].pt[mask].flatten(),    weight=w_leading_photon)
                ezfill('photon_eta0',             eta=photons[leadphoton_index].eta[mask].flatten(),  weight=w_leading_photon)
                ezfill('photon_phi0',             phi=photons[leadphoton_index].phi[mask].flatten(),  weight=w_leading_photon)
                ezfill('photon_eta_phi', phi=photons[leadphoton_index].phi[mask].flatten(),eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], region_weights.weight()[mask])

            # PV
            ezfill('npv', nvtx=df['PV_npvs'][mask], weight=region_weights.weight()[mask])
            ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=region_weights.weight()[mask])

            ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])

            ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.weight()[mask])
            ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.weight()[mask])
            ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask])
        return output
コード例 #10
0
    def process(self, events):

        # Initialize accumulator
        out = self.accumulator.identity()
        dataset = sample_name
        #events.metadata['dataset']

        # Data or MC
        isData = 'genWeight' not in events.fields

        #Stop processing if there is no event remain
        if len(events) == 0:
            return out

        # Golden Json file
        if (self._year == "2018") and isData:
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABCD"

        if (self._year == "2017") and isData:
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt"

        # <----- Get Scale factors ------>#

        if not isData:

            # Egamma reco ID
            get_ele_reco_above20_sf = self._corrections[
                'get_ele_reco_above20_sf'][self._year]
            get_ele_medium_id_sf = self._corrections['get_ele_medium_id_sf'][
                self._year]
            get_pho_medium_id_sf = self._corrections['get_pho_medium_id_sf'][
                self._year]

            # DoubleEG trigger # 2016, 2017 are not applied yet
            if self._year == "2018":
                get_ele_trig_leg1_SF = self._corrections[
                    'get_ele_trig_leg1_SF'][self._year]
                get_ele_trig_leg1_data_Eff = self._corrections[
                    'get_ele_trig_leg1_data_Eff'][self._year]
                get_ele_trig_leg1_mc_Eff = self._corrections[
                    'get_ele_trig_leg1_mc_Eff'][self._year]
                get_ele_trig_leg2_SF = self._corrections[
                    'get_ele_trig_leg2_SF'][self._year]
                get_ele_trig_leg2_data_Eff = self._corrections[
                    'get_ele_trig_leg2_data_Eff'][self._year]
                get_ele_trig_leg2_mc_Eff = self._corrections[
                    'get_ele_trig_leg2_mc_Eff'][self._year]

            # PU weight with custom made npy and multi-indexing
            pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64")
            pu = self._puweight_arr[pu_weight_idx]

        selection = processor.PackedSelection()

        # Cut flow
        cut0 = np.zeros(len(events))

        # <----- Helper functions ------>#

        #  Sort by PT  helper function
        def sort_by_pt(ele, pho, jet):
            ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)]
            pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)]
            jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)]

            return ele, pho, jet

        # Lorentz vectors
        from coffea.nanoevents.methods import vector
        ak.behavior.update(vector.behavior)

        def TLorentz_vector(vec):
            vec = ak.zip({
                "x": vec.x,
                "y": vec.y,
                "z": vec.z,
                "t": vec.t
            },
                         with_name="LorentzVector")
            return vec

        def TLorentz_vector_cylinder(vec):

            vec = ak.zip(
                {
                    "pt": vec.pt,
                    "eta": vec.eta,
                    "phi": vec.phi,
                    "mass": vec.mass,
                },
                with_name="PtEtaPhiMLorentzVector",
            )

            return vec

        # Cut-based ID modification
        @numba.njit
        def PhotonVID(vid, idBit):
            rBit = 0
            for x in range(0, 7):
                rBit |= (1 << x) if ((vid >> (x * 2)) & 0b11 >= idBit) else 0
            return rBit

        # Inverse Sieie and upper limit
        @numba.njit
        def make_fake_obj_mask(Pho, builder):

            #for eventIdx,pho in enumerate(tqdm(Pho)):   # --Event Loop
            for eventIdx, pho in enumerate(Pho):
                builder.begin_list()
                if len(pho) < 1: continue

                for phoIdx, _ in enumerate(pho):  # --Photon Loop

                    vid = Pho[eventIdx][phoIdx].vidNestedWPBitmap
                    vid_cuts1 = PhotonVID(vid, 1)  # Loose photon
                    vid_cuts2 = PhotonVID(vid, 2)  # Medium photon
                    vid_cuts3 = PhotonVID(vid, 3)  # Tight photon

                    # Field name
                    # |0|0|0|0|0|0|0|
                    # |IsoPho|IsoNeu|IsoChg|Sieie|hoe|scEta|PT|

                    # 1. Turn off cut (ex turn off Sieie
                    # |1|1|1|0|1|1|1| = |1|1|1|0|1|1|1|

                    # 2. Inverse cut (ex inverse Sieie)
                    # |1|1|1|1|1|1|1| = |1|1|1|0|1|1|1|

                    #if (vid_cuts2 & 0b1111111 == 0b1111111): # Cut applied
                    #if (vid_cuts2 & 0b1111111 == 0b1110111): # Inverse Sieie
                    if (vid_cuts2 & 0b1110111 == 0b1110111):  # Without Sieie

                        builder.boolean(True)

                    else:

                        builder.boolean(False)

                builder.end_list()

            return builder

        # <----- Selection ------>#

        Initial_events = events
        # Good Run ( Golden Json files )
        from coffea import lumi_tools

        if isData:
            lumi_mask_builder = lumi_tools.LumiMask(injson)
            lumimask = ak.Array(
                lumi_mask_builder.__call__(events.run, events.luminosityBlock))
            events = events[lumimask]
            #print("{0}%  of files pass good-run conditions".format(len(events)/ len(Initial_events)))

        # Stop processing if there is no event remain
        if len(events) == 0:
            return out

        ##----------- Cut flow1: Passing Triggers

        # double lepton trigger
        is_double_ele_trigger = True
        if not is_double_ele_trigger:
            double_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._doubleelectron_triggers[self._year]:
                if path not in events.HLT.fields: continue
                double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[
                    path]

        # single lepton trigger
        is_single_ele_trigger = True
        if not is_single_ele_trigger:
            single_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._singleelectron_triggers[self._year]:
                if path not in events.HLT.fields: continue
                single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[
                    path]

        events.Electron, events.Photon, events.Jet = sort_by_pt(
            events.Electron, events.Photon, events.Jet)

        # Good Primary vertex
        nPV = events.PV.npvsGood
        if not isData: nPV = nPV * pu
        nPV_nw = nPV

        # Apply cut1
        events = events[double_ele_triggers_arr]
        if not isData: pu = pu[double_ele_triggers_arr]

        cut1 = np.ones(len(events))

        # Set Particles
        Electron = events.Electron
        Muon = events.Muon
        Photon = events.Photon
        MET = events.MET
        Jet = events.Jet

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        # --Gen Photon for dR
        genparts = events.GenPart
        pdgID_mask = (genparts.pdgId == 22)
        # mask2: isPrompt | fromHardProcess | isLastCopy
        mask2 = (1 << 0) | (1 << 8) | (1 << 13)
        # https://github.com/PKUHEPEWK/WGamma/blob/master/2018/wgRealPhotonTemplateModule.py

        status_mask = ((genparts.statusFlags & mask2) == mask2)
        gen_photons = genparts[pdgID_mask & status_mask]

        assert (ak.all(ak.num(gen_photons) == 1)
                )  # Raise error if len(gen_photon) != 1

        #  --Muon ( only used to calculate dR )
        MuSelmask = (Muon.pt >= 10) & (abs(
            Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15)
        Muon = Muon[MuSelmask]

        ##----------- Cut flow2: Electron Selection

        EleSelmask = ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479)  &  (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1)) | \
           ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2))

        Electron = Electron[EleSelmask]

        # apply cut 2
        Tri_electron_mask = ak.num(Electron) >= 2
        Electron = Electron[Tri_electron_mask]
        Photon = Photon[Tri_electron_mask]
        Jet = Jet[Tri_electron_mask]
        MET = MET[Tri_electron_mask]
        Muon = Muon[Tri_electron_mask]
        if not isData: pu = pu[Tri_electron_mask]
        events = events[Tri_electron_mask]
        gen_photons = gen_photons[Tri_electron_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut2 = np.ones(len(Photon)) * 2

        ##----------- Cut flow3: Photon Selection

        # Basic photon selection
        isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) &
                                                  (abs(Photon.eta) < 2.5))
        Pixel_seed_mask = ~Photon.pixelSeed
        PT_mask = Photon.pt >= 20

        # dR cut with selected Muon and Electrons
        dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5,
                                 axis=-1)  # default metric table: delta_r
        dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1)

        PhoSelmask = PT_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask
        Photon = Photon[PhoSelmask]

        # Apply cut 3
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        if not isData: pu = pu[A_photon_mask]
        events = events[A_photon_mask]
        gen_photons = gen_photons[A_photon_mask]

        Photon_template_mask = make_fake_obj_mask(
            Photon, ak.ArrayBuilder()).snapshot()
        Photon = Photon[Photon_template_mask]

        # Apply cut 3
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        if not isData: pu = pu[A_photon_mask]
        events = events[A_photon_mask]
        gen_photons = gen_photons[A_photon_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut3 = np.ones(len(Photon)) * 3

        ## --  Additional photon selection: Photon gen-matching

        # Choose Photons that dR(genPhoton,Photon) <= 0.1
        gen_match_photon_mask = ak.all(Photon.metric_table(gen_photons) <= 0.1,
                                       axis=-1)

        # Apply cut
        Photon = Photon[gen_match_photon_mask]
        gen_match_photon_evt_mask = ak.num(Photon) >= 1

        Electron = Electron[gen_match_photon_evt_mask]
        Photon = Photon[gen_match_photon_evt_mask]
        Jet = Jet[gen_match_photon_evt_mask]
        MET = MET[gen_match_photon_evt_mask]
        gen_photons = gen_photons[gen_match_photon_evt_mask]
        if not isData: pu = pu[gen_match_photon_evt_mask]
        events = events[gen_match_photon_evt_mask]

        ##-----------  Cut flow4:  Select 2 OSSF electrons from Z
        @numba.njit
        def find_2lep(events_leptons, builder):
            for leptons in events_leptons:

                builder.begin_list()
                nlep = len(leptons)
                for i0 in range(nlep):
                    for i1 in range(i0 + 1, nlep):
                        if leptons[i0].charge + leptons[i1].charge != 0:
                            continue

                        if nlep == 2:
                            builder.begin_tuple(2)
                            builder.index(0).integer(i0)
                            builder.index(1).integer(i1)
                            builder.end_tuple()

                        else:
                            for i2 in range(nlep):
                                if len({i0, i1, i2}) < 3: continue
                                builder.begin_tuple(3)
                                builder.index(0).integer(i0)
                                builder.index(1).integer(i1)
                                builder.index(2).integer(i2)
                                builder.end_tuple()
                builder.end_list()
            return builder

        ossf_idx = find_2lep(Electron, ak.ArrayBuilder()).snapshot()

        # OSSF cut
        ossf_mask = ak.num(ossf_idx) >= 1
        ossf_idx = ossf_idx[ossf_mask]
        Electron = Electron[ossf_mask]
        Photon = Photon[ossf_mask]
        Jet = Jet[ossf_mask]
        MET = MET[ossf_mask]
        events = events[ossf_mask]
        if not isData: pu = pu[ossf_mask]

        Double_electron = [Electron[ossf_idx[idx]] for idx in "01"]
        from coffea.nanoevents.methods import vector
        ak.behavior.update(vector.behavior)

        Diele = ak.zip({
            "lep1":
            Double_electron[0],
            "lep2":
            Double_electron[1],
            "p4":
            TLorentz_vector(Double_electron[0] + Double_electron[1])
        })

        bestZ_idx = ak.singletons(
            ak.argmin(abs(Diele.p4.mass - 91.1876), axis=1))
        Diele = Diele[bestZ_idx]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out
        cut4 = np.ones(len(Electron)) * 4

        leading_ele = Diele.lep1
        subleading_ele = Diele.lep2

        def make_leading_pair(target, base):
            return target[ak.argmax(base.pt, axis=1, keepdims=True)]

        leading_pho = make_leading_pair(Photon, Photon)

        # -- Scale Factor for each electron

        # Trigger weight helper function
        def Trigger_Weight(eta1, pt1, eta2, pt2):
            per_ev_MC =\
            get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\
            get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\
            get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2)

            per_ev_data =\
            get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\
            get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\
            get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2)

            return per_ev_data / per_ev_MC

        if not isData:

            ## -------------< Egamma ID and Reco Scale factor > -----------------##
            get_pho_medium_id_sf = get_pho_medium_id_sf(
                ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt))

            ele_reco_sf = get_ele_reco_above20_sf(
                ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
                ak.flatten(leading_ele.pt)) * get_ele_reco_above20_sf(
                    ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
                    ak.flatten(subleading_ele.pt))

            ele_medium_id_sf = get_ele_medium_id_sf(
                ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
                ak.flatten(leading_ele.pt)) * get_ele_medium_id_sf(
                    ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
                    ak.flatten(subleading_ele.pt))

            ## -------------< Double Electron Trigger Scale factor > -----------------##
            eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta)
            eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta)
            pt1 = ak.flatten(leading_ele.pt)
            pt2 = ak.flatten(subleading_ele.pt)

            # -- 2017,2016 are not applied yet
            if self._year == '2018':
                ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2)

        ##----------- Cut flow5: Event selection

        # Mee cut
        Mee_cut_mask = ak.firsts(Diele.p4.mass) > 4

        # Electron PT cuts
        Elept_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20))

        # MET cuts
        MET_mask = MET.pt > 20

        # --------Mask -------#
        Event_sel_mask = Mee_cut_mask & Elept_mask & MET_mask
        Diele_sel = Diele[Event_sel_mask]
        leading_pho_sel = leading_pho[Event_sel_mask]
        Jet_sel = Jet[Event_sel_mask]
        MET_sel = MET[Event_sel_mask]

        # Photon  EE and EB
        isEE_mask = leading_pho.isScEtaEE
        isEB_mask = leading_pho.isScEtaEB
        Pho_EE = leading_pho[isEE_mask & Event_sel_mask]
        Pho_EB = leading_pho[isEB_mask & Event_sel_mask]

        #Stop processing if there is no event remain
        if len(leading_pho_sel) == 0:
            return out

        cut5 = np.ones(len(Diele)) * 5

        # -------------------- Flatten variables ---------------------------#

        # -- Ele1 --#
        Ele1_PT = ak.flatten(Diele_sel.lep1.pt)
        Ele1_Eta = ak.flatten(Diele_sel.lep1.eta)
        Ele1_Phi = ak.flatten(Diele_sel.lep1.phi)

        # -- Ele2 --#
        Ele2_PT = ak.flatten(Diele_sel.lep2.pt)
        Ele2_Eta = ak.flatten(Diele_sel.lep2.eta)
        Ele2_Phi = ak.flatten(Diele_sel.lep2.phi)

        # -- Pho -- #
        Pho_PT = ak.flatten(leading_pho_sel.pt)
        Pho_Eta = ak.flatten(leading_pho_sel.eta)
        Pho_Phi = ak.flatten(leading_pho_sel.phi)

        # -- Pho EB --#
        Pho_EB_PT = ak.flatten(Pho_EB.pt)
        Pho_EB_Eta = ak.flatten(Pho_EB.eta)
        Pho_EB_Phi = ak.flatten(Pho_EB.phi)
        Pho_EB_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg)
        Pho_EB_Sieie = ak.flatten(Pho_EE.sieie)

        # -- Pho EE --#
        Pho_EE_PT = ak.flatten(Pho_EE.pt)
        Pho_EE_Eta = ak.flatten(Pho_EE.eta)
        Pho_EE_Phi = ak.flatten(Pho_EE.phi)
        Pho_EE_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg)
        Pho_EE_Sieie = ak.flatten(Pho_EE.sieie)

        # --Kinematics --#
        Diele_mass = ak.flatten(Diele_sel.p4.mass)

        leading_ele, subleading_ele = ak.flatten(
            TLorentz_vector_cylinder(Diele_sel.lep1)), ak.flatten(
                TLorentz_vector_cylinder(Diele_sel.lep2))
        dR_e1pho = ak.flatten(
            leading_ele.delta_r(leading_pho_sel))  # dR pho,ele1
        dR_e2pho = ak.flatten(
            subleading_ele.delta_r(leading_pho_sel))  # dR pho,ele2
        dR_jpho = ak.flatten(Jet_sel[:, 0].delta_r(leading_pho_sel))

        MET_PT = ak.to_numpy(MET_sel.pt)

        # -------------------- Sieie bins---------------------------#
        def make_bins(pt, eta, sieie, bin_range_str):

            bin_dict = {
                'PT_1_eta_1': (pt > 20) & (pt < 30) & (eta < 1),
                'PT_1_eta_2': (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5),
                'PT_1_eta_3': (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2),
                'PT_1_eta_4': (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5),
                'PT_2_eta_1': (pt > 30) & (pt < 40) & (eta < 1),
                'PT_2_eta_2': (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5),
                'PT_2_eta_3': (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2),
                'PT_2_eta_4': (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5),
                'PT_3_eta_1': (pt > 40) & (pt < 50) & (eta < 1),
                'PT_3_eta_2': (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5),
                'PT_3_eta_3': (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2),
                'PT_3_eta_4': (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5),
                'PT_4_eta_1': (pt > 50) & (eta < 1),
                'PT_4_eta_2': (pt > 50) & (eta > 1) & (eta < 1.5),
                'PT_4_eta_3': (pt > 50) & (eta > 1.5) & (eta < 2),
                'PT_4_eta_4': (pt > 50) & (eta > 2) & (eta < 2.5)
            }

            binmask = bin_dict[bin_range_str]

            return ak.to_numpy(sieie[binmask]), binmask

        bin_name_list = [
            'PT_1_eta_1', 'PT_1_eta_2', 'PT_1_eta_3', 'PT_1_eta_4',
            'PT_2_eta_1', 'PT_2_eta_2', 'PT_2_eta_3', 'PT_2_eta_4',
            'PT_3_eta_1', 'PT_3_eta_2', 'PT_3_eta_3', 'PT_3_eta_4',
            'PT_4_eta_1', 'PT_4_eta_2', 'PT_4_eta_3', 'PT_4_eta_4'
        ]

        binned_sieie_hist = {}
        binmask_dict = {}
        for name in bin_name_list:
            binned_sieie_hist[name], _ = make_bins(
                ak.flatten(leading_pho_sel.pt),
                ak.flatten(abs(leading_pho_sel.eta)),
                ak.flatten(leading_pho_sel.sieie), name)
            _, binmask_dict[name] = make_bins(ak.flatten(leading_pho.pt),
                                              ak.flatten(abs(leading_pho.eta)),
                                              ak.flatten(leading_pho.sieie),
                                              name)

        print("Show me the last bin: ", binned_sieie_hist['PT_4_eta_4'])

        # --- Apply weight and hist
        weights = processor.Weights(len(cut4))

        # --- skim cut-weight
        def skim_weight(arr):
            mask1 = ~ak.is_none(arr)
            subarr = arr[mask1]
            mask2 = subarr != 0
            return ak.to_numpy(subarr[mask2])

        cuts = Event_sel_mask
        cuts_pho_EE = ak.flatten(isEE_mask)
        cuts_pho_EB = ak.flatten(isEB_mask)

        print(
            "cut0: {0}, cut1: {1}, cut2: {2}, cut3: {3}, cut4: {4} ,cut5 {5} ".
            format(len(Initial_events), len(cut1), len(cut2), len(cut3),
                   len(cut4), len(cut5)))

        # Weight and SF here
        if not isData:
            weights.add('pileup', pu)
            weights.add('ele_id', ele_medium_id_sf)
            weights.add('pho_id', get_pho_medium_id_sf)
            weights.add('ele_reco', ele_reco_sf)

            # 2016,2017 are not applied yet
            if self._year == "2018":
                weights.add('ele_trigger', ele_trig_weight)

        # ---------------------------- Fill hist --------------------------------------#

        # Initial events
        out["sumw"][dataset] += len(Initial_events)

        # Cut flow loop
        for cut in [cut0, cut1, cut2, cut3, cut4, cut5]:
            out["cutflow"].fill(dataset=dataset, cutflow=cut)

        # Primary vertex
        out['nPV'].fill(
            dataset=dataset,
            nPV=nPV,
        )
        out['nPV_nw'].fill(dataset=dataset, nPV_nw=nPV_nw)

        # Fill hist

        # -- met -- #
        out["met"].fill(dataset=dataset,
                        met=MET_PT,
                        weight=skim_weight(weights.weight() * cuts))

        # --mass -- #
        out["mass"].fill(dataset=dataset,
                         mass=Diele_mass,
                         weight=skim_weight(weights.weight() * cuts))
        # -- Ele1 -- #
        out["ele1pt"].fill(dataset=dataset,
                           ele1pt=Ele1_PT,
                           weight=skim_weight(weights.weight() * cuts))
        out["ele1eta"].fill(dataset=dataset,
                            ele1eta=Ele1_Eta,
                            weight=skim_weight(weights.weight() * cuts))
        out["ele1phi"].fill(dataset=dataset,
                            ele1phi=Ele1_Phi,
                            weight=skim_weight(weights.weight() * cuts))

        # --Ele2 --#
        out["ele2pt"].fill(dataset=dataset,
                           ele2pt=Ele2_PT,
                           weight=skim_weight(weights.weight() * cuts))
        out["ele2eta"].fill(dataset=dataset,
                            ele2eta=Ele2_Eta,
                            weight=skim_weight(weights.weight() * cuts))
        out["ele2phi"].fill(dataset=dataset,
                            ele2phi=Ele2_Phi,
                            weight=skim_weight(weights.weight() * cuts))

        # -- Photon -- #

        out["phopt"].fill(dataset=dataset,
                          phopt=Pho_PT,
                          weight=skim_weight(weights.weight() * cuts))
        out["phoeta"].fill(dataset=dataset,
                           phoeta=Pho_Eta,
                           weight=skim_weight(weights.weight() * cuts))
        out["phophi"].fill(dataset=dataset,
                           phophi=Pho_Phi,
                           weight=skim_weight(weights.weight() * cuts))

        # -- Binned sieie hist -- #
        if len(binned_sieie_hist['PT_1_eta_1'] > 0):
            out['PT_1_eta_1'].fill(dataset=dataset,
                                   PT_1_eta_1=binned_sieie_hist['PT_1_eta_1'])
        if len(binned_sieie_hist['PT_1_eta_2'] > 0):
            out['PT_1_eta_2'].fill(dataset=dataset,
                                   PT_1_eta_2=binned_sieie_hist['PT_1_eta_2'])
        if len(binned_sieie_hist['PT_1_eta_3'] > 0):
            out['PT_1_eta_3'].fill(dataset=dataset,
                                   PT_1_eta_3=binned_sieie_hist['PT_1_eta_3'])
        if len(binned_sieie_hist['PT_1_eta_4'] > 0):
            out['PT_1_eta_4'].fill(dataset=dataset,
                                   PT_1_eta_4=binned_sieie_hist['PT_1_eta_4'])
        if len(binned_sieie_hist['PT_2_eta_1'] > 0):
            out['PT_2_eta_1'].fill(dataset=dataset,
                                   PT_2_eta_1=binned_sieie_hist['PT_2_eta_1'])
        if len(binned_sieie_hist['PT_2_eta_2'] > 0):
            out['PT_2_eta_2'].fill(dataset=dataset,
                                   PT_2_eta_2=binned_sieie_hist['PT_2_eta_2'])
        if len(binned_sieie_hist['PT_2_eta_3'] > 0):
            out['PT_2_eta_3'].fill(dataset=dataset,
                                   PT_2_eta_3=binned_sieie_hist['PT_2_eta_3'])
        if len(binned_sieie_hist['PT_2_eta_4'] > 0):
            out['PT_2_eta_4'].fill(dataset=dataset,
                                   PT_2_eta_4=binned_sieie_hist['PT_2_eta_4'])
        if len(binned_sieie_hist['PT_3_eta_1'] > 0):
            out['PT_3_eta_1'].fill(dataset=dataset,
                                   PT_3_eta_1=binned_sieie_hist['PT_3_eta_1'])
        if len(binned_sieie_hist['PT_3_eta_2'] > 0):
            out['PT_3_eta_2'].fill(dataset=dataset,
                                   PT_3_eta_2=binned_sieie_hist['PT_3_eta_2'])
        if len(binned_sieie_hist['PT_3_eta_3'] > 0):
            out['PT_3_eta_3'].fill(dataset=dataset,
                                   PT_3_eta_3=binned_sieie_hist['PT_3_eta_3'])
        if len(binned_sieie_hist['PT_3_eta_4'] > 0):
            out['PT_3_eta_4'].fill(dataset=dataset,
                                   PT_3_eta_4=binned_sieie_hist['PT_3_eta_4'])
        if len(binned_sieie_hist['PT_4_eta_1'] > 0):
            out['PT_4_eta_1'].fill(dataset=dataset,
                                   PT_4_eta_1=binned_sieie_hist['PT_4_eta_1'])
        if len(binned_sieie_hist['PT_4_eta_2'] > 0):
            out['PT_4_eta_2'].fill(dataset=dataset,
                                   PT_4_eta_2=binned_sieie_hist['PT_4_eta_2'])
        if len(binned_sieie_hist['PT_4_eta_3'] > 0):
            out['PT_4_eta_3'].fill(dataset=dataset,
                                   PT_4_eta_3=binned_sieie_hist['PT_4_eta_3'])
        if len(binned_sieie_hist['PT_4_eta_4'] > 0):
            out['PT_4_eta_4'].fill(dataset=dataset,
                                   PT_4_eta_4=binned_sieie_hist['PT_4_eta_4'])

        return out
コード例 #11
0
    def process(self, events):
        dataset = events.metadata['dataset']
        isRealData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        weights = processor.Weights(len(events))
        output = self.accumulator.identity()
        if not isRealData:
            output['sumw'][dataset] += events.genWeight.sum()

        if isRealData:
            trigger = np.zeros(events.size, dtype='bool')
            for t in self._triggers[self._year]:
                trigger = trigger | events.HLT[t]
        else:
            trigger = np.ones(events.size, dtype='bool')
        selection.add('trigger', trigger)

        if isRealData:
            trigger = np.zeros(events.size, dtype='bool')
            for t in self._muontriggers[self._year]:
                trigger = trigger | events.HLT[t]
        else:
            trigger = np.ones(events.size, dtype='bool')
        selection.add('muontrigger', trigger)

        try:
            fatjets = events.FatJet
        except AttributeError:
            # early pancakes
            fatjets = events.CustomAK8Puppi
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['rho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)
        fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year]

        candidatejet = fatjets[
            # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
            # & fatjets.isLoose  # not always available
        ][:, 0:1]
        selection.add('minjetkin', ((candidatejet.pt >= 450)
                                    & (candidatejet.msdcorr >= 47.)
                                    & (abs(candidatejet.eta) < 2.5)).any())
        selection.add('jetacceptance', ((candidatejet.msdcorr >= 47.)
                                        & (candidatejet.pt < 1200)
                                        & (candidatejet.msdcorr < 201.)).any())
        selection.add('jetid', candidatejet.isTight.any())
        selection.add('n2ddt', (candidatejet.n2ddt < 0.).any())
        selection.add('ddbpass', (candidatejet.btagDDBvL >= 0.89).any())

        jets = events.Jet[(events.Jet.pt > 30.)
                          & (abs(events.Jet.eta) < 2.5)
                          & events.Jet.isTight]
        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1))
        ak4_opposite = jets[(dphi > np.pi / 2).all()]
        selection.add(
            'antiak4btagMediumOppHem',
            ak4_opposite.btagDeepB.max() <
            BTagEfficiency.btagWPs[self._year]['medium'])
        ak4_away = jets[(dphi > 0.8).all()]
        selection.add(
            'ak4btagMedium08',
            ak4_away.btagDeepB.max() >
            BTagEfficiency.btagWPs[self._year]['medium'])

        selection.add('met', events.MET.pt < 140.)

        goodmuon = ((events.Muon.pt > 10)
                    & (abs(events.Muon.eta) < 2.4)
                    & (events.Muon.pfRelIso04_all < 0.25)
                    & (events.Muon.looseId).astype(bool))
        nmuons = goodmuon.sum()
        leadingmuon = events.Muon[goodmuon][:, 0:1]
        muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True)

        nelectrons = (
            (events.Electron.pt > 10)
            & (abs(events.Electron.eta) < 2.5)
            & (events.Electron.cutBased >= events.Electron.LOOSE)).sum()

        ntaus = ((events.Tau.pt > 20)
                 & (events.Tau.idDecayMode).astype(bool)
                 # bacon iso looser than Nano selection
                 ).sum()

        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon',
                      (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin', ((leadingmuon.pt > 55.)
                                  & (abs(leadingmuon.eta) < 2.1)).all())
        selection.add('muonDphiAK8',
                      (abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) >
                       2 * np.pi / 3).all().all())

        if isRealData:
            genflavor = candidatejet.pt.zeros_like()
        else:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            bosons = getBosons(events)
            genBosonPt = bosons.pt.pad(1, clip=True).fillna(0)
            add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            genflavor = matchedBosonFlavor(candidatejet, bosons).pad(
                1, clip=True).fillna(-1).flatten()
            add_jetTriggerWeight(weights, candidatejet.msdcorr,
                                 candidatejet.pt, self._year)
            output['btagWeight'].fill(dataset=dataset,
                                      val=self._btagSF.addBtagWeight(
                                          weights, ak4_away))
            logger.debug("Weight statistics: %r" % weights._weightStats)

        msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * (
            genflavor > 0) + candidatejet.msdcorr * (genflavor == 0)

        regions = {
            'signal': [
                'trigger',
                'minjetkin',
            ],  #'noleptons','jetacceptance', 'noleptons','jetid',],#'jetid', 'noleptons',],# 'n2ddt','antiak4btagMediumOppHem'],#, 'met',],
            'muoncontrol': [
                'muontrigger',
                'minjetkin',
                'jetid',
                'muonDphiAK8',
                'muonkin',
                'ak4btagMedium08',
                'onemuon',
            ],  # 'muonkin', 'muonDphiAK8'],
            'noselection': [],
        }

        for region, cuts in regions.items():
            allcuts = set()
            logger.debug(
                f"Filling cutflow with: {dataset}, {region}, {genflavor}, {weights.weight()}"
            )
            #output['cutflow'].fill(dataset=dataset, region=region, genflavor=genflavor, cut=0, weight=weights.weight())
            #for i, cut in enumerate(cuts + ['ddbpass']):
            #    allcuts.add(cut)
            #    cut = selection.all(*allcuts)
            #    output['cutflow'].fill(dataset=dataset, region=region, genflavor=genflavor[cut], cut=i + 1, weight=weights.weight()[cut])

        systematics = [
            None,
            'jet_triggerUp',
            'jet_triggerDown',
            'btagWeightUp',
            'btagWeightDown',
            'btagEffStatUp',
            'btagEffStatDown',
        ]

        def normalize(val, cut):
            return val[cut].pad(1, clip=True).fillna(0).flatten()

        def fill(region, systematic=None, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            if wmod is None:
                weight = weights.weight(modifier=systematic)[cut]
            else:
                weight = weights.weight()[cut] * wmod[cut]

            output['templates'].fill(
                dataset=dataset,
                region=region,
                #systematic=sname,
                #genflavor=genflavor[cut],
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(msd_matched, cut),
                #ddb=normalize(candidatejet.btagDDBvL, cut),
                weight=weight,
            )
            if wmod is not None:
                output['genresponse_noweight'].fill(
                    dataset=dataset,
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=events.genWeight[cut] * wmod[cut],
                )
                output['genresponse'].fill(
                    dataset=dataset,
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=weight,
                )

        for region in regions:
            cut = selection.all(*(set(regions[region]) - {'n2ddt'}))
            output['nminus1_n2ddt'].fill(
                dataset=dataset,
                region=region,
                n2ddt=normalize(candidatejet.n2ddt, cut),
                weight=weights.weight()[cut],
            )
            #for systematic in systematics:
            fill(region)  #, systematic)
            if 'GluGluHToBB' in dataset:
                for i in range(9):
                    fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:,
                                                                          i])
                for c in events.LHEWeight.columns[1:]:
                    fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c])

        return output
コード例 #12
0
	def process(self, events):

		# Initialize accumulator
		out = self.accumulator.identity()
		dataset = setname
		#events.metadata['dataset']
		

		isData = 'genWeight' not in events.fields
		

		selection = processor.PackedSelection()

		# Cut flow
		cut0 = np.zeros(len(events))
		

		# --- Selection

		# << flat dim helper function >>
		def flat_dim(arr):

			sub_arr = ak.flatten(arr)
			mask = ~ak.is_none(sub_arr)

			return ak.to_numpy(sub_arr[mask])
		# << drop na helper function >>
		def drop_na(arr):

			mask = ~ak.is_none(arr)

			return arr[mask]
		# << drop na helper function >>
		def drop_na_np(arr):

			mask = ~np.isnan(arr)

			return arr[mask]


		# double lepton trigger
		is_double_ele_trigger=True
		if not is_double_ele_trigger:
			double_ele_triggers_arr=np.ones(len(events), dtype=np.bool)
		else:
			double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
			for path in self._doubleelectron_triggers[self._year]:
				if path not in events.HLT.fields: continue
				double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path]


		# single lepton trigger
		is_single_ele_trigger=True
		if not is_single_ele_trigger:
			single_ele_triggers_arr=np.ones(len(events), dtype=np.bool)
		else:
			single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
			for path in self._singleelectron_triggers[self._year]:
				if path not in events.HLT.fields: continue
				single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path]


		
		Initial_events = events
		print("#### Initial events: ",Initial_events)
		#events = events[single_ele_triggers_arr | double_ele_triggers_arr]
		events = events[double_ele_triggers_arr]
		
		##----------- Cut flow1: Passing Triggers
		cut1 = np.ones(len(events))
		print("#### cut1: ",len(cut1))
		# Particle Identification
		Electron = events.Electron

		def Electron_selection(ele):
			return(ele.pt > 25) & (np.abs(ele.eta) < 2.5) & (ele.cutBased > 2)
		

		# Electron channel
		Electron_mask = Electron_selection(Electron)
		Ele_channel_mask = ak.num(Electron[Electron_mask]) > 1
		Ele_channel_events = events[Ele_channel_mask]


		##-----------  Cut flow2: Electron channel
		cut2 = np.ones(len(Ele_channel_events)) * 2
		print("#### cut2: ",len(cut2))
		
		# --- Calculate Scale factor weight
		
		if not isData:
			# PU weight with lookup table <-- On developing -->
			#get_pu_weight = self._corrections['get_pu_weight'][self._year]
			#pu = get_pu_weight(events.Pileup.nTrueInt)
	
			get_ele_reco_sf = self._corrections['get_ele_reco_sf'][self._year]
			get_ele_loose_id_sf = self._corrections['get_ele_loose_id_sf'][self._year]


			get_ele_trig_leg1_SF		= self._corrections['get_ele_trig_leg1_SF'][self._year]
			get_ele_trig_leg1_data_Eff	= self._corrections['get_ele_trig_leg1_data_Eff'][self._year]
			get_ele_trig_leg1_mc_Eff	= self._corrections['get_ele_trig_leg1_mc_Eff'][self._year]
			get_ele_trig_leg2_SF		= self._corrections['get_ele_trig_leg2_SF'][self._year]
			get_ele_trig_leg2_data_Eff  = self._corrections['get_ele_trig_leg2_data_Eff'][self._year]
			get_ele_trig_leg2_mc_Eff	= self._corrections['get_ele_trig_leg2_mc_Eff'][self._year]





			# PU weight with custom made npy and multi-indexing
			pu_weight_idx = ak.values_astype(Ele_channel_events.Pileup.nTrueInt,"int64")
			pu = self._puweight_arr[pu_weight_idx]
			nPV = Ele_channel_events.PV.npvsGood
		
		else:
			nPV = Ele_channel_events.PV.npvsGood


		# Electron array
		Ele = Ele_channel_events.Electron
		Electron_mask = Electron_selection(Ele)	
		Ele_sel = Ele[Electron_mask]	



		# Electron pair
		ele_pairs = ak.combinations(Ele_sel,2,axis=1)
		ele_left, ele_right = ak.unzip(ele_pairs)
		diele = ele_left + ele_right

		# OS
		os_mask		 = diele.charge == 0 
		os_diele	 = diele[os_mask]
		os_ele_left  = ele_left[os_mask]
		os_ele_right = ele_right[os_mask]
		os_event_mask = ak.num(os_diele) > 0
		Ele_os_channel_events = Ele_channel_events[os_event_mask]
		#selection.add('ossf',os_event_mask)


		# Helper function: High PT argmax
		def make_leading_pair(target,base):

			return target[ak.argmax(base.pt,axis=1,keepdims=True)]


		# -- Only Leading pair --
		leading_diele = make_leading_pair(diele,diele)
		leading_ele   = make_leading_pair(ele_left,diele)
		subleading_ele= make_leading_pair(ele_right,diele)

		# -- Scale Factor for each electron

		def Trigger_Weight(eta1,pt1,eta2,pt2):
			per_ev_MC =\
			get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\
			get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\
			get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2)

			per_ev_data =\
			get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\
			get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\
			get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2)

			return per_ev_data/per_ev_MC
			

		if not isData:
			ele_loose_id_sf = get_ele_loose_id_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_loose_id_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt))
			#print("Ele ID SC---->",ele_loose_id_sf)
			
			ele_reco_sf = get_ele_reco_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_reco_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt))
			#print("Ele RECO SC---->",ele_reco_sf)
		
		
			eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta)
			eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta)
			pt1  = ak.flatten(leading_ele.pt)	
			pt2  = ak.flatten(subleading_ele.pt)

			ele_trig_weight = Trigger_Weight(eta1,pt1,eta2,pt2)
			print("#### Test print trigger weight ####")
			print(ele_trig_weight)

		# --OS and Leading pair --
		leading_os_diele = make_leading_pair(os_diele,os_diele)
		leading_os_ele   = make_leading_pair(os_ele_left,os_diele)
		subleading_os_ele= make_leading_pair(os_ele_right,os_diele)

		##-----------  Cut flow3: OSSF
		cut3 = np.ones(len(flat_dim(leading_os_diele))) * 3
		print("#### cut3: ",len(cut3))

		# Helper function: Zmass window
		def makeZmass_window_mask(dielecs,start=60,end=120):
			mask = (dielecs.mass >= start) & (dielecs.mass <= end)	
			return mask

		# -- OS and Leading pair --
		Zmass_mask_os = makeZmass_window_mask(leading_os_diele)
		leading_os_Zwindow_ele = leading_os_ele[Zmass_mask_os]
		subleading_os_Zwindow_ele = subleading_os_ele[Zmass_mask_os]
		leading_os_Zwindow_diele = leading_os_diele[Zmass_mask_os]
		

		# for masking
		Zmass_event_mask = makeZmass_window_mask(leading_diele)
		Zmass_os_event_mask= ak.flatten(os_event_mask * Zmass_event_mask)
		

		Ele_Zmass_os_events = Ele_channel_events[Zmass_os_event_mask]

		##-----------  Cut flow4: Zmass
		cut4 = np.ones(len(flat_dim(leading_os_Zwindow_diele))) * 4
		print("#### cut4: ",len(cut4))


		
		## << Selection method -- Need validation >>
		#print("a--->",len(Ele_channel_events))
		#print("b--->",len(Ele_os_channel_events))
		#print("b2--->",len(cut3))
		#print("c--->",len(Ele_Zmass_os_events))
		#print("c2--->",len(cut4))


		ele1PT  = flat_dim(leading_os_Zwindow_ele.pt)
		ele1Eta = flat_dim(leading_os_Zwindow_ele.eta)
		ele1Phi = flat_dim(leading_os_Zwindow_ele.phi)
		ele2PT  = flat_dim(subleading_os_Zwindow_ele.pt)
		ele2Eta = flat_dim(subleading_os_Zwindow_ele.eta)
		ele2Phi = flat_dim(subleading_os_Zwindow_ele.phi)
		Mee	 = flat_dim(leading_os_Zwindow_diele.mass)
		charge  = flat_dim(leading_os_Zwindow_diele.charge)
		
		# --- Apply weight and hist  
		weights = processor.Weights(len(cut2))


		# --- skim cut-weight 
		def skim_weight(arr):
			mask1 = ~ak.is_none(arr)
			subarr = arr[mask1]
			mask2 = subarr !=0
			return ak.to_numpy(subarr[mask2])

		cuts = ak.flatten(Zmass_mask_os)
		if not isData:
			weights.add('pileup',pu)		
			weights.add('ele_id',ele_loose_id_sf)		
			weights.add('ele_reco',ele_reco_sf)		
			#weights.add('ele_trigger',ele_trig_weight)		

		# Initial events
		out["sumw"][dataset] += len(Initial_events)


		# Cut flow loop
		for cut in [cut0,cut1,cut2,cut3,cut4]:
			out["cutflow"].fill(
				dataset = dataset,
				cutflow=cut
			)

		

		# Primary vertex
		out['nPV'].fill(
			dataset=dataset,
			nPV = nPV,
			weight = weights.weight()
		)
		out['nPV_nw'].fill(
			dataset=dataset,
			nPV_nw = nPV
		)

		# Physics varibles passing Zwindow
		out["mass"].fill(
			dataset=dataset,
			mass=Mee,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele1pt"].fill(
			dataset=dataset,
			ele1pt=ele1PT,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele1eta"].fill(
			dataset=dataset,
			ele1eta=ele1Eta,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele1phi"].fill(
			dataset=dataset,
			ele1phi=ele1Phi,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele2pt"].fill(
			dataset=dataset,
			ele2pt=ele2PT,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele2eta"].fill(
			dataset=dataset,
			ele2eta=ele2Eta,
			weight = skim_weight(weights.weight() * cuts)
		)
		out["ele2phi"].fill(
			dataset=dataset,
			ele2phi=ele2Phi,
			weight = skim_weight(weights.weight() * cuts)
		)
		return out
コード例 #13
0
ファイル: hbbprocessor.py プロジェクト: DAZSLE/boostedhiggs
    def process(self, events):
        dataset = events.metadata['dataset']
        isRealData = 'genWeight' not in events.columns
        output = self.accumulator.identity()
        selection = processor.PackedSelection()

        trigger = np.ones(events.size, dtype='bool')
        for t in self._triggers[self._year]:
            trigger = trigger & events.HLT[t]
        selection.add('trigger', trigger)

        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['rho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)

        candidatejet = fatjets[:, 0:1]
        selection.add('jetkin', ((candidatejet.pt > 450)
                                 & (candidatejet.eta < 2.4)
                                 & (candidatejet.msdcorr > 40.)).any())
        selection.add('jetid', (candidatejet.jetId & 2).any())  # tight id
        selection.add('n2ddt', (candidatejet.n2ddt < 0.).any())

        jets = events.Jet[(events.Jet.pt > 30.)
                          & (events.Jet.jetId & 2)  # tight id
                          ]
        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        dphi = ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1)
        ak4_opposite = jets[(np.abs(dphi) > np.pi / 2).all()]
        selection.add(
            'antiak4btagMediumOppHem',
            ak4_opposite.btagDeepB.max() < self._btagWPs['med'][self._year])
        ak4_away = jets[(np.abs(dphi) > 0.8).all()]
        selection.add(
            'ak4btagMedium08',
            ak4_away.btagDeepB.max() > self._btagWPs['med'][self._year])

        selection.add('met', events.MET.pt < 140.)
        goodmuon = ((events.Muon.pt > 10)
                    & (np.abs(events.Muon.eta) < 2.4)
                    & (events.Muon.pfRelIso04_all < 0.25)
                    & (events.Muon.looseId).astype(bool))
        nmuons = goodmuon.sum()
        leadingmuon = events.Muon[goodmuon][:, 0:1]
        muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True)

        nelectrons = (
            (events.Electron.pt > 10)
            & (np.abs(events.Electron.eta) < 2.5)
            & (events.Electron.cutBased >= events.Electron.LOOSE)).sum()

        ntaus = ((events.Tau.pt > 20)
                 & (events.Tau.idDecayMode).astype(bool)
                 # bacon iso looser than Nano selection
                 ).sum()

        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon',
                      (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin', ((leadingmuon.pt > 55.)
                                  & (np.abs(leadingmuon.eta) < 2.1)).all())
        selection.add('muonDphiAK8', (muon_ak8_pair.i0.delta_phi(
            muon_ak8_pair.i1) > 2 * np.pi / 3).all().all())

        cutflow = [
            'jetkin', 'trigger', 'jetid', 'n2ddt', 'antiak4btagMediumOppHem',
            'met', 'noleptons'
        ]
        allcuts = set()
        output['cutflow']['none'] += len(events)
        for cut in cutflow:
            allcuts.add(cut)
            output['cutflow'][cut] += selection.all(*allcuts).sum()

        weights = processor.Weights(len(events))
        if not isRealData:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            bosons = events.GenPart[(np.abs(events.GenPart.pdgId) >= 21)
                                    & (np.abs(events.GenPart.pdgId) <= 37)
                                    & events.GenPart.hasFlags(
                                        ['isHardProcess', 'isLastCopy'])]
            genBosonPt = bosons.pt.pad(1, clip=True).fillna(0)
            add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)

            ak8_boson_pair = candidatejet.cross(bosons, nested=True)
            dR2 = ak8_boson_pair.i0.delta_r2(ak8_boson_pair.i1)
            dPt2 = ((ak8_boson_pair.i0.pt - ak8_boson_pair.i1.pt) /
                    (ak8_boson_pair.i0.pt + ak8_boson_pair.i1.pt))**2
            matchedBoson = ak8_boson_pair.i1[(dR2 +
                                              dPt2).argmin()].flatten(axis=1)

        return output
コード例 #14
0
    def process(self, events):
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']

        isRealData = 'genWeight' not in events.fields
        if not isRealData:
            output['sumw'][dataset] += sum(events.genWeight)
            JECversion = JECversions[str(self.year)]['MC']
        else:
            output['nbtagmu'][dataset] += ak.count(events.event)
            JECversion = JECversions[str(
                self.year)]['Data'][dataset.split('BTagMu')[1]]

        ############
        # Some corrections
        weights = processor.Weights(len(events))
        corrections = {}
        if not isRealData:
            weights.add('genWeight', events.genWeight)
            weights.add(
                'pileup_weight',
                self.puReweight(self.puFile, self.nTrueFile,
                                dataset)(events.Pileup.nPU))

        events.FatJet = self.applyJEC(events.FatJet,
                                      events.fixedGridRhoFastjetAll,
                                      events.caches[0], 'AK8PFPuppi',
                                      isRealData, JECversion)

        cuts = processor.PackedSelection()

        ############
        # Trigger selection
        if self.year == 2016:
            if 'BTagMu_AK4Jet300_Mu5' not in events.HLT.fields:
                self.triggers = [
                    trigger.replace('AK4', '') for trigger in self.triggers
                ]
        elif self.year == 2018:
            for (i, trigger) in enumerate(self.triggers):
                if trigger.strip("HLT_") not in events.HLT.fields:
                    self.triggers[i] = trigger + "_noalgo"

        trig_arrs = [
            events.HLT[_trig.strip("HLT_")] for _trig in self.triggers
        ]
        req_trig = np.zeros(len(events), dtype='bool')
        for t in trig_arrs:
            req_trig = req_trig | t
        cuts.add('trigger', ak.to_numpy(req_trig))

        ############
        # Basic cuts
        ## Muon cuts
        # muon twiki: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2
        events.Muon = events.Muon[(events.Muon.pt > 5)
                                  & (abs(events.Muon.eta < 2.4)) &
                                  (events.Muon.tightId != 1) &
                                  (events.Muon.pfRelIso04_all > 0.15)]
        events.Muon = ak.pad_none(events.Muon, 2, axis=1)

        ## Jet cuts  (not used)
        events.Jet = events.Jet[(events.Jet.pt > 25)
                                & (abs(events.Jet.eta) <= 2.5)]
        #req_jets = (ak.count(events.Jet.pt, axis=1) >= 2)

        ## FatJet cuts
        events.FatJet = events.FatJet[
            (events.FatJet.pt > self._mask_fatjets['basic']['pt_cut']) &
            (abs(events.FatJet.eta) <= self._mask_fatjets['basic']['eta_cut'])
            & (events.FatJet.jetId > self._mask_fatjets['basic']['jetId_cut'])
            & (ak.count(events.FatJet.subjets.pt, axis=2) >=
               2)]  ## subjet sel to crosscheck
        #print(events['FatJetSVs'])

        ## Event level variables
        eventVariables = {}
        eventVariables['nfatjet'] = ak.num(events.FatJet)

        ## Leading jet variables
        leadfatjet = ak.firsts(events.FatJet)
        leadfatjet['tau21'] = leadfatjet.tau2 / leadfatjet.tau1
        subjet1 = ak.pad_none(leadfatjet.subjets, 2)[:, 0]
        subjet2 = ak.pad_none(leadfatjet.subjets, 2)[:, 1]
        leadfatjet['nsv1'] = get_nsv(subjet1, events.SV)
        leadfatjet['nsv2'] = get_nsv(subjet2, events.SV)
        leadfatjet['nmusj1'] = ak.num(subjet1.delta_r(events.Muon) < 0.4)
        leadfatjet['nmusj2'] = ak.num(subjet2.delta_r(events.Muon) < 0.4)

        fatjet_mutag = (leadfatjet.nmusj1 >= 1) & (leadfatjet.nmusj2 >= 1)
        cuts.add('fatjet_mutag', ak.to_numpy(fatjet_mutag))

        for DDX in self._mask_DDX.keys():
            for wp, cut in self._mask_DDX[DDX].items():
                DDX_pass = (leadfatjet[f'btag{DDX}vLV2'] > cut)
                DDX_fail = (leadfatjet[f'btag{DDX}vLV2'] < cut)
                cuts.add(f'{DDX}_pass{wp}wp', ak.to_numpy(DDX_pass))
                cuts.add(f'{DDX}_fail{wp}wp', ak.to_numpy(DDX_fail))

        flavors = {}
        if not isRealData:
            flavors['b'] = (leadfatjet.hadronFlavour == 5)
            flavors['c'] = (leadfatjet.hadronFlavour == 4)
            flavors['l'] = (leadfatjet.hadronFlavour < 4)
            flavors['bb'] = abs(leadfatjet.hadronFlavour == 5) & (
                leadfatjet.nBHadrons >= 2)  #& (leadfatjet.nCHadrons == 0)
            flavors['cc'] = abs(leadfatjet.hadronFlavour == 4) & (
                leadfatjet.nBHadrons == 0) & (leadfatjet.nCHadrons >= 2)
            #flavors['ll'] = abs(leadfatjet.hadronFlavour < 4) & (leadfatjet.nBHadrons == 0) & (leadfatjet.nCHadrons == 0)
            flavors['b'] = flavors['b'] & ~flavors['bb']
            flavors['c'] = flavors['c'] & ~flavors['cc']
            flavors['l'] = flavors['l'] & ~flavors['bb'] & ~flavors[
                'cc'] & ~flavors['b'] & ~flavors['c']
            #flavors['others'] = ~flavors['l'] & ~flavors['bb'] & ~flavors['cc'] & ~flavors['b'] & ~flavors['c']
        else:
            flavors['Data'] = np.ones(len(events), dtype='bool')

        for selname, cut in self._mask_fatjets.items():

            sel = (leadfatjet.pt > cut['pt_cut']) & \
                    (leadfatjet.msoftdrop > cut['mass_cut']) & \
                    (abs(leadfatjet.eta) < cut['eta_cut']) & \
                    (leadfatjet.jetId >= cut['jetId_cut']) & \
                    (leadfatjet.tau21 < cut['tau21_cut'])
            #(leadfatjet.Hbb > cut['Hbb'])

            cuts.add(selname, ak.to_numpy(sel))

        selection = {}
        selection['basic'] = {'trigger', 'basic'}
        selection['pt350msd50'] = {'trigger', 'fatjet_mutag', 'pt350msd50'}
        selection['msd100tau06'] = {'trigger', 'fatjet_mutag', 'msd100tau06'}
        selection['pt400msd100tau06'] = {
            'trigger', 'fatjet_mutag', 'pt400msd100tau06'
        }
        for mask_f in self._final_mask:
            for DDX in self._mask_DDX.keys():
                for wp, cut in self._mask_DDX[DDX].items():
                    selection[f'{mask_f}{DDX}pass{wp}wp'] = selection[
                        mask_f].copy()
                    selection[f'{mask_f}{DDX}pass{wp}wp'].add(
                        f'{DDX}_pass{wp}wp')
                    selection[f'{mask_f}{DDX}fail{wp}wp'] = selection[
                        mask_f].copy()
                    selection[f'{mask_f}{DDX}fail{wp}wp'].add(
                        f'{DDX}_fail{wp}wp')

        for histname, h in output.items():
            sel = [r for r in selection.keys() if r in histname.split('_')]
            if ((histname in self.fatjet_hists) |
                ('hist2d_fatjet' in histname)):
                for flav, mask in flavors.items():
                    weight = weights.weight() * cuts.all(
                        *selection[sel[0]]) * ak.to_numpy(mask)
                    fields = {
                        k: ak.fill_none(leadfatjet[k], -9999)
                        for k in h.fields if k in dir(leadfatjet)
                    }
                    h.fill(dataset=dataset,
                           flavor=flav,
                           **fields,
                           weight=weight)
            if histname in self.event_hists:
                for flav, mask in flavors.items():
                    weight = weights.weight() * cuts.all(
                        *selection[sel[0]]) * ak.to_numpy(mask)
                    fields = {
                        k: ak.fill_none(eventVariables[k], -9999)
                        for k in h.fields if k in eventVariables.keys()
                    }
                    h.fill(dataset=dataset,
                           flavor=flav,
                           **fields,
                           weight=weight)

        return output
コード例 #15
0
	def process(self, events):

		# Initialize accumulator
		out = self.accumulator.identity()
		dataset = sample_name
		# events.metadata['dataset']

		# Data or MC
		isData = "genWeight" not in events.fields
		isFake = self._isFake


		# Stop processing if there is no event remain
		if len(events) == 0:
			return out

		# Golden Json file
		if (self._year == "2018") and isData:
			injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABD"

		if (self._year == "2017") and isData:
			injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt"

		# <----- Get Scale factors ------>#

		if not isData:

			# Egamma reco ID
			get_ele_reco_above20_sf = self._corrections["get_ele_reco_above20_sf"][
				self._year
			]
			get_ele_medium_id_sf = self._corrections["get_ele_medium_id_sf"][self._year]
			get_pho_medium_id_sf = self._corrections["get_pho_medium_id_sf"][self._year]

			# DoubleEG trigger # 2016, 2017 are not applied yet
			if self._year == "2018":
				get_ele_trig_leg1_SF = self._corrections["get_ele_trig_leg1_SF"][
					self._year
				]
				get_ele_trig_leg1_data_Eff = self._corrections[
					"get_ele_trig_leg1_data_Eff"
				][self._year]
				get_ele_trig_leg1_mc_Eff = self._corrections[
					"get_ele_trig_leg1_mc_Eff"
				][self._year]
				get_ele_trig_leg2_SF = self._corrections["get_ele_trig_leg2_SF"][
					self._year
				]
				get_ele_trig_leg2_data_Eff = self._corrections[
					"get_ele_trig_leg2_data_Eff"
				][self._year]
				get_ele_trig_leg2_mc_Eff = self._corrections[
					"get_ele_trig_leg2_mc_Eff"
				][self._year]

			# PU weight with custom made npy and multi-indexing
			pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64")
			pu = self._puweight_arr[pu_weight_idx]

			print("## pu_idx: ",len(pu_weight_idx),pu_weight_idx)
			print("## pu_arr: ",len(self._puweight_arr),self._puweight_arr)
			print("## pu:",len(pu),pu)

		selection = processor.PackedSelection()

		# Cut flow
		cut0 = np.zeros(len(events))
		out["cutflow"].fill(dataset=dataset, cutflow=cut0)
		# <----- Helper functions ------>#

		#  Sort by PT  helper function
		def sort_by_pt(ele, pho, jet):
			ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)]
			pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)]
			jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)]

			return ele, pho, jet

		# Lorentz vectors
		from coffea.nanoevents.methods import vector

		ak.behavior.update(vector.behavior)

		def TLorentz_vector(vec):
			vec = ak.zip(
				{"x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t},
				with_name="LorentzVector",
			)
			return vec

		def TLorentz_vector_cylinder(vec):

			vec = ak.zip(
				{
					"pt": vec.pt,
					"eta": vec.eta,
					"phi": vec.phi,
					"mass": vec.mass,
				},
				with_name="PtEtaPhiMLorentzVector",
			)

			return vec

		# <----- Selection ------>#

		Initial_events = events
		# Good Run ( Golden Json files )
		from coffea import lumi_tools

		if isData:
			lumi_mask_builder = lumi_tools.LumiMask(injson)
			lumimask = ak.Array(
				lumi_mask_builder.__call__(events.run, events.luminosityBlock)
			)
			events = events[lumimask]
			# print("{0}%  of files pass good-run conditions".format(len(events)/ len(Initial_events)))

		# Stop processing if there is no event remain
		if len(events) == 0:
			return out

		##----------- Cut flow1: Passing Triggers

		# double lepton trigger
		is_double_ele_trigger = True
		if not is_double_ele_trigger:
			double_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
		else:
			double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
			for path in self._doubleelectron_triggers[self._year]:
				if path not in events.HLT.fields:
					continue
				double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path]

		# single lepton trigger
		is_single_ele_trigger = True
		if not is_single_ele_trigger:
			single_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
		else:
			single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
			for path in self._singleelectron_triggers[self._year]:
				if path not in events.HLT.fields:
					continue
				single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path]

		events.Electron, events.Photon, events.Jet = sort_by_pt(
			events.Electron, events.Photon, events.Jet
		)

		# Good Primary vertex
		nPV = events.PV.npvsGood
		nPV_nw = events.PV.npvsGood
		if not isData:
			nPV = nPV * pu

			print(pu)


		# Apply cut1
		events = events[double_ele_triggers_arr]
		if not isData:
			pu = pu[double_ele_triggers_arr]

		# Stop processing if there is no event remain
		if len(events) == 0:
			return out

		cut1 = np.ones(len(events))
		out["cutflow"].fill(dataset=dataset, cutflow=cut1)

		# Set Particles
		Electron = events.Electron
		Muon = events.Muon
		Photon = events.Photon
		MET = events.MET
		Jet = events.Jet


		#  --Muon ( only used to calculate dR )
		MuSelmask = (
			(Muon.pt >= 10)
			& (abs(Muon.eta) <= 2.5)
			& (Muon.tightId)
			& (Muon.pfRelIso04_all < 0.15)
		)
		Muon = Muon[MuSelmask]

		
		#  --Loose Muon ( For Loose Muon veto )
		LoooseMuSelmask = (
			(Muon.pt > 20)
			& (abs(Muon.eta) < 2.4)
			& (Muon.isPFcand)
			& (Muon.isGlobal | Muon.isTracker)
			& (Muon.pfRelIso03_all < 0.25)
		)
		# Reference: VBS Zgamma+2jets
		
		VetoMuon = Muon[LoooseMuSelmask]
		

		##----------- Cut flow2: Electron Selection

		EleSelmask = (
			(Electron.pt >= 10)
			& (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479)
			& (Electron.cutBased > 2)
			& (abs(Electron.dxy) < 0.05)
			& (abs(Electron.dz) < 0.1)
		) | (
			(Electron.pt >= 10)
			& (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479)
			& (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5)
			& (Electron.cutBased > 2)
			& (abs(Electron.dxy) < 0.1)
			& (abs(Electron.dz) < 0.2)
		)

		Electron = Electron[EleSelmask]

		
		# Event with 3 Electrons
		# apply cut 2
		Tri_electron_mask = ak.num(Electron) == 3 
		Electron = Electron[Tri_electron_mask]
		Photon = Photon[Tri_electron_mask]
		Jet = Jet[Tri_electron_mask]
		MET = MET[Tri_electron_mask]
		Muon = Muon[Tri_electron_mask]
		VetoMuon = VetoMuon[Tri_electron_mask]
		if not isData:
			pu = pu[Tri_electron_mask]
		events = events[Tri_electron_mask]
		
		# Stop processing if there is no event remain
		if len(Electron) == 0:
			return out

		cut2 = np.ones(len(Photon)) * 2
		out["cutflow"].fill(dataset=dataset, cutflow=cut2)

		##----------- Cut flow3: 4th lepton veto (Loose Muon)
		# Veto 4th Loose muon
		# apply cut 3
		fourth_lepton_veto = ak.num(VetoMuon) < 1
		Electron = Electron[fourth_lepton_veto]
		Photon = Photon[fourth_lepton_veto]
		Jet = Jet[fourth_lepton_veto]
		MET = MET[fourth_lepton_veto]
		Muon = Muon[fourth_lepton_veto]
		if not isData:
			pu = pu[fourth_lepton_veto]
		events = events[fourth_lepton_veto]
		
		# Stop processing if there is no event remain
		if len(Electron) == 0:
			return out

		cut3 = np.ones(len(Photon)) * 3
		out["cutflow"].fill(dataset=dataset, cutflow=cut3)


		##----------- Cut flow4: Photon Selection

		# Basic photon selection
		isgap_mask = (abs(Photon.eta) < 1.442) | (
			(abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5)
		)
		Pixel_seed_mask = ~Photon.pixelSeed

		if (dataset == "ZZ") and (self._year == "2017"):
			PT_ID_mask = (Photon.pt >= 20) & (
				Photon.cutBasedBitmap >= 3
			)  # 2^0(Loose) + 2^1(Medium) + 2^2(Tights)
		else:
			PT_ID_mask = (Photon.pt >= 20) & (Photon.cutBased > 1)

		# dR cut with selected Muon and Electrons
		dr_pho_ele_mask = ak.all(
			Photon.metric_table(Electron) >= 0.5, axis=-1
		)  # default metric table: delta_r
		dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1)

		# genPartFlav cut
		"""
		if dataset == "WZG":
			isPrompt = (Photon.genPartFlav == 1) | (Photon.genPartFlav == 11)
			PhoSelmask = PT_ID_mask & isgap_mask &  Pixel_seed_mask & isPrompt & dr_pho_ele_mask & dr_pho_mu_mask

		elif dataset == "WZ":
			isPrompt = (Photon.genPartFlav == 1) 
			PhoSelmask = PT_ID_mask & isgap_mask &  Pixel_seed_mask & ~isPrompt & dr_pho_ele_mask & dr_pho_mu_mask
				
		else:
			PhoSelmask = PT_ID_mask  & isgap_mask &  Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask
		"""

		PhoSelmask = (
			PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask
		)
		Photon = Photon[PhoSelmask]

		# Apply cut 4
		A_photon_mask = ak.num(Photon) > 0
		Electron = Electron[A_photon_mask]
		Photon = Photon[A_photon_mask]
		Jet = Jet[A_photon_mask]
		Muon = Muon[A_photon_mask]
		MET = MET[A_photon_mask]
		if not isData:
			pu = pu[A_photon_mask]
		events = events[A_photon_mask]

		# Stop processing if there is no event remain
		if len(Electron) == 0:
			return out

		cut4 = np.ones(len(Photon)) * 4
		out["cutflow"].fill(dataset=dataset, cutflow=cut4)

		##----------- Cut flow5: OSSF
		# OSSF index maker
		@numba.njit
		def find_3lep(events_leptons, builder):
			for leptons in events_leptons:

				builder.begin_list()
				nlep = len(leptons)
				for i0 in range(nlep):
					for i1 in range(i0 + 1, nlep):
						if leptons[i0].charge + leptons[i1].charge != 0:
							continue

						for i2 in range(nlep):
							if len({i0, i1, i2}) < 3:
								continue
							builder.begin_tuple(3)
							builder.index(0).integer(i0)
							builder.index(1).integer(i1)
							builder.index(2).integer(i2)
							builder.end_tuple()
				builder.end_list()
			return builder

		eee_triplet_idx = find_3lep(Electron, ak.ArrayBuilder()).snapshot()

		ossf_mask = ak.num(eee_triplet_idx) == 2

		# Apply cut 5
		eee_triplet_idx = eee_triplet_idx[ossf_mask]
		Electron = Electron[ossf_mask]
		Photon = Photon[ossf_mask]
		Jet = Jet[ossf_mask]
		MET = MET[ossf_mask]
		if not isData:
			pu = pu[ossf_mask]
		events = events[ossf_mask]

		# Stop processing if there is no event remain
		if len(Electron) == 0:
			return out

		cut5 = np.ones(ak.sum(ak.num(Electron) > 0)) * 5
		out["cutflow"].fill(dataset=dataset, cutflow=cut5)

		# Define Electron Triplet

		Triple_electron = [Electron[eee_triplet_idx[idx]] for idx in "012"]
		Triple_eee = ak.zip(
			{
				"lep1": Triple_electron[0],
				"lep2": Triple_electron[1],
				"lep3": Triple_electron[2],
				"p4": TLorentz_vector(Triple_electron[0] + Triple_electron[1]),
			}
		)

		# Ele pair selector --> Close to Z mass
		bestZ_idx = ak.singletons(ak.argmin(abs(Triple_eee.p4.mass - 91.1876), axis=1))
		Triple_eee = Triple_eee[bestZ_idx]

		leading_ele = Triple_eee.lep1
		subleading_ele = Triple_eee.lep2
		third_ele = Triple_eee.lep3

		def make_leading_pair(target, base):
			return target[ak.argmax(base.pt, axis=1, keepdims=True)]

		leading_pho = make_leading_pair(Photon, Photon)

		# -- Scale Factor for each electron

		# Trigger weight helper function
		def Trigger_Weight(eta1, pt1, eta2, pt2):
			per_ev_MC = (
				get_ele_trig_leg1_mc_Eff(eta1, pt1)
				* get_ele_trig_leg2_mc_Eff(eta2, pt2)
				+ get_ele_trig_leg1_mc_Eff(eta2, pt2)
				* get_ele_trig_leg2_mc_Eff(eta1, pt1)
				- get_ele_trig_leg1_mc_Eff(eta1, pt1)
				* get_ele_trig_leg1_mc_Eff(eta2, pt2)
			)

			per_ev_data = (
				get_ele_trig_leg1_data_Eff(eta1, pt1)
				* get_ele_trig_leg1_SF(eta1, pt1)
				* get_ele_trig_leg2_data_Eff(eta2, pt2)
				* get_ele_trig_leg2_SF(eta2, pt2)
				+ get_ele_trig_leg1_data_Eff(eta2, pt2)
				* get_ele_trig_leg1_SF(eta2, pt2)
				* get_ele_trig_leg2_data_Eff(eta1, pt1)
				* get_ele_trig_leg2_SF(eta1, pt1)
				- get_ele_trig_leg1_data_Eff(eta1, pt1)
				* get_ele_trig_leg1_SF(eta1, pt1)
				* get_ele_trig_leg1_data_Eff(eta2, pt2)
				* get_ele_trig_leg1_SF(eta2, pt2)
			)

			return per_ev_data / per_ev_MC

		if not isData:

			## -------------< Egamma ID and Reco Scale factor > -----------------##
			get_pho_medium_id_sf = get_pho_medium_id_sf(
				ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt)
			)

			ele_reco_sf = (
				get_ele_reco_above20_sf(
					ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
					ak.flatten(leading_ele.pt),
				)
				* get_ele_reco_above20_sf(
					ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
					ak.flatten(subleading_ele.pt),
				)
				* get_ele_reco_above20_sf(
					ak.flatten(third_ele.deltaEtaSC + third_ele.eta),
					ak.flatten(third_ele.pt),
				)
			)

			ele_medium_id_sf = (
				get_ele_medium_id_sf(
					ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
					ak.flatten(leading_ele.pt),
				)
				* get_ele_medium_id_sf(
					ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
					ak.flatten(subleading_ele.pt),
				)
				* get_ele_medium_id_sf(
					ak.flatten(third_ele.deltaEtaSC + third_ele.eta),
					ak.flatten(third_ele.pt),
				)
			)

			## -------------< Double Electron Trigger Scale factor > -----------------##
			eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta)
			eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta)
			pt1 = ak.flatten(leading_ele.pt)
			pt2 = ak.flatten(subleading_ele.pt)

			# -- 2017,2016 are not applied yet
			if self._year == "2018":
				ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2)

		##----------- Cut flow6: Event selection

		# Mee cut
		diele = Triple_eee.p4
		Mee_cut_mask = ak.firsts(diele.mass) > 4

		# Z mass window
		# zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) < 15 # SR, CR_ZZA, CR_Z+jets, CR_Conversion
		# zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) > 5 #  CR_t-enriched
		# zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) > 15 #  CR_Conversion

		# M(eee) cut SR, CR_ZZA, CR_Z+jets, CR_t enriched
		# eee = Triple_eee.lep1 + Triple_eee.lep2 + Triple_eee.lep3
		# Meee_cut_mask = ak.firsts(eee.mass > 100)
		# Meee_cut_mask = ak.firsts(eee.mass <= 100)

		# b-Jet veto cut  #SR, CR_ZZA, CR_Z+jets, CR_Conversion
		# bjet_mask = (Jet.btagCSVV2 > 0.4184)	&  (Jet.pt > 30)
		# bjet_veto_mask = ak.num(Jet[bjet_mask]) == 0
		# bjet_veto_mask = ak.num(Jet[bjet_mask]) > 0 # CR_t-enriched

		# Electron PT cuts
		Elept_mask = ak.firsts(
			(leading_ele.pt >= 25) & (subleading_ele.pt >= 10) & (third_ele.pt >= 25)
		)

		# MET cuts
		MET_mask = MET > 20  # Baseline
		# MET_mask = MET.pt > 30 #  SR, CR-ZZE, CR-t-entirched
		# MET_mask = MET.pt <= 30 #  CR-Z+jets. CR-Conversion

		# Mask
		# Event_sel_mask = Elept_mask & MET_mask & bjet_veto_mask & Mee_cut_mask & zmass_window_mask  & Meee_cut_mask # SR,CR
		Event_sel_mask = Elept_mask & MET_mask & Mee_cut_mask  # SR,CR

		# Apply cut6
		Triple_eee_sel = Triple_eee[Event_sel_mask]
		leading_pho_sel = leading_pho[Event_sel_mask]
		MET_sel = MET[Event_sel_mask]
		events = events[Event_sel_mask]

		# Photon  EE and EB
		isEE_mask = leading_pho.isScEtaEE
		isEB_mask = leading_pho.isScEtaEB
		Pho_EE = leading_pho[isEE_mask & Event_sel_mask]
		Pho_EB = leading_pho[isEB_mask & Event_sel_mask]


		# Stop processing if there is no event remain
		if len(leading_pho_sel) == 0:
			return out

		cut6 = np.ones(ak.sum(ak.num(leading_pho_sel) > 0)) * 6
		out["cutflow"].fill(dataset=dataset, cutflow=cut6)

		## -------------------- Prepare making hist --------------#

		# Photon
		phoPT = ak.flatten(leading_pho_sel.pt)
		phoEta = ak.flatten(leading_pho_sel.eta)
		phoPhi = ak.flatten(leading_pho_sel.phi)

		# Photon EE
		if len(Pho_EE.pt) != 0:
			Pho_EE_PT = ak.flatten(Pho_EE.pt)
			Pho_EE_Eta = ak.flatten(Pho_EE.eta)
			Pho_EE_Phi = ak.flatten(Pho_EE.phi)
			Pho_EE_sieie = ak.flatten(Pho_EE.sieie)
			Pho_EE_hoe = ak.flatten(Pho_EE.hoe)
			Pho_EE_Iso_charge = ak.flatten(Pho_EE.pfRelIso03_chg)

		# Photon EB
		if len(Pho_EB.pt) != 0:
			Pho_EB_PT = ak.flatten(Pho_EB.pt)
			Pho_EB_Eta = ak.flatten(Pho_EB.eta)
			Pho_EB_Phi = ak.flatten(Pho_EB.phi)
			Pho_EB_sieie = ak.flatten(Pho_EB.sieie)
			Pho_EB_hoe = ak.flatten(Pho_EB.hoe)
			Pho_EB_Iso_charge = ak.flatten(Pho_EB.pfRelIso03_chg)

		# Electrons
		ele1PT = ak.flatten(Triple_eee_sel.lep1.pt)
		ele1Eta = ak.flatten(Triple_eee_sel.lep1.eta)
		ele1Phi = ak.flatten(Triple_eee_sel.lep1.phi)

		ele2PT = ak.flatten(Triple_eee_sel.lep2.pt)
		ele2Eta = ak.flatten(Triple_eee_sel.lep2.eta)
		ele2Phi = ak.flatten(Triple_eee_sel.lep2.phi)

		ele3PT = ak.flatten(Triple_eee_sel.lep3.pt)
		ele3Eta = ak.flatten(Triple_eee_sel.lep3.eta)
		ele3Phi = ak.flatten(Triple_eee_sel.lep3.phi)

		charge = ak.flatten(Triple_eee.lep1.charge + Triple_eee.lep2.charge)

		# MET
		met = ak.to_numpy(MET_sel)

		# M(eea) M(ee)
		diele = Triple_eee_sel.p4
		eeg_vec = diele + leading_pho_sel
		Meea = ak.flatten(eeg_vec.mass)
		Mee = ak.flatten(Triple_eee_sel.p4.mass)


		# --- Apply weight and hist
		
		if isFake:
			weights = processor.Weights(len(cut6))
		else:
			weights = processor.Weights(len(cut5))
			


		# -------------------- Sieie bins---------------------------#
		def make_bins(pt, eta, bin_range_str):

			bin_dict = {
				"PT_1_eta_1": (pt > 20) & (pt < 30) & (eta < 1),
				"PT_1_eta_2": (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5),
				"PT_1_eta_3": (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2),
				"PT_1_eta_4": (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5),
				"PT_2_eta_1": (pt > 30) & (pt < 40) & (eta < 1),
				"PT_2_eta_2": (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5),
				"PT_2_eta_3": (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2),
				"PT_2_eta_4": (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5),
				"PT_3_eta_1": (pt > 40) & (pt < 50) & (eta < 1),
				"PT_3_eta_2": (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5),
				"PT_3_eta_3": (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2),
				"PT_3_eta_4": (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5),
				"PT_4_eta_1": (pt > 50) & (eta < 1),
				"PT_4_eta_2": (pt > 50) & (eta > 1) & (eta < 1.5),
				"PT_4_eta_3": (pt > 50) & (eta > 1.5) & (eta < 2),
				"PT_4_eta_4": (pt > 50) & (eta > 2) & (eta < 2.5),
			}

			binmask = bin_dict[bin_range_str]

			return binmask

		bin_name_list = [
			"PT_1_eta_1",
			"PT_1_eta_2",
			"PT_1_eta_3",
			"PT_1_eta_4",
			"PT_2_eta_1",
			"PT_2_eta_2",
			"PT_2_eta_3",
			"PT_2_eta_4",
			"PT_3_eta_1",
			"PT_3_eta_2",
			"PT_3_eta_3",
			"PT_3_eta_4",
			"PT_4_eta_1",
			"PT_4_eta_2",
			"PT_4_eta_3",
			"PT_4_eta_4",
		]



		## -- Fake-fraction Lookup table --##
		if isFake:
			# Make Bin-range mask
			binned_pteta_mask = {}
			for name in bin_name_list:
				binned_pteta_mask[name] = make_bins(
					ak.flatten(leading_pho_sel.pt),
					ak.flatten(abs(leading_pho_sel.eta)),
					name,
				)
			# Read Fake fraction --> Mapping bin name to int()
			in_dict = np.load('Fitting_v2/results_210517.npy',allow_pickle="True")[()]
			idx=0
			fake_dict ={}
			for i,j in in_dict.items():
				fake_dict[idx] = j
				idx+=1


			# Reconstruct Fake_weight
			fw= 0
			for i,j in binned_pteta_mask.items():
				fw = fw + j*fake_dict[bin_name_list.index(i)]


			# Process 0 weight to 1
			@numba.njit
			def zero_one(x):
				if x == 0:
					x = 1
				return x
			vec_zero_one = np.vectorize(zero_one)
			fw = vec_zero_one(fw)




		# --- skim cut-weight
		if not isFake:
			def skim_weight(arr):
				mask1 = ~ak.is_none(arr)
				subarr = arr[mask1]
				mask2 = subarr != 0
				return ak.to_numpy(subarr[mask2])
		else:
			def skim_weight(arr):
				return arr


		if not isFake:
			cuts = Event_sel_mask
			cuts_pho_EE = ak.flatten(isEE_mask)
			cuts_pho_EB = ak.flatten(isEB_mask)

		if isFake:
			cuts = np.ones(len(Event_sel_mask))
			cuts_pho_EE = ak.flatten(isEE_mask & Event_sel_mask)
			cuts_pho_EB = ak.flatten(isEB_mask & Event_sel_mask)


		if isFake:
			weights.add("fake_fraction", fw)
			
		# Weight and SF here
		if not (isData | isFake):
			weights.add("pileup", pu)
			weights.add("ele_id", ele_medium_id_sf)
			weights.add("pho_id", get_pho_medium_id_sf)
			weights.add("ele_reco", ele_reco_sf)

			# 2016,2017 are not applied yet
			if self._year == "2018":
				weights.add("ele_trigger", ele_trig_weight)

		# ---------------------------- Fill hist --------------------------------------#

		# Initial events
		out["sumw"][dataset] += len(Initial_events)


		print("cut1: {0},cut2: {1},cut3: {2},cut4: {3},cut5: {4},cut6: {5},cut7: {6}".format(len(cut0), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5),len(cut6)))


		## Cut flow loop
		#for cut in [cut0, cut1, cut2, cut3, cut4, cut5,cut6]:
		#	out["cutflow"].fill(dataset=dataset, cutflow=cut)

		# Primary vertex
		out["nPV"].fill(
			dataset=dataset,
			nPV=nPV,
		)
		out["nPV_nw"].fill(dataset=dataset, nPV_nw=nPV_nw)

		# Fill hist

		# -- met -- #
		out["met"].fill(
			dataset=dataset, met=met, weight=skim_weight(weights.weight() * cuts)
		)

		# --mass -- #
		out["mass"].fill(
			dataset=dataset, mass=Mee, weight=skim_weight(weights.weight() * cuts)
		)
		out["mass_eea"].fill(
			dataset=dataset, mass_eea=Meea, weight=skim_weight(weights.weight() * cuts)
		)

		# -- Electron -- #
		out["ele1pt"].fill(
			dataset=dataset, ele1pt=ele1PT, weight=skim_weight(weights.weight() * cuts)
		)
		out["ele1eta"].fill(
			dataset=dataset,
			ele1eta=ele1Eta,
			weight=skim_weight(weights.weight() * cuts),
		)
		out["ele1phi"].fill(
			dataset=dataset,
			ele1phi=ele1Phi,
			weight=skim_weight(weights.weight() * cuts),
		)
		out["ele2pt"].fill(
			dataset=dataset, ele2pt=ele2PT, weight=skim_weight(weights.weight() * cuts)
		)
		out["ele2eta"].fill(
			dataset=dataset,
			ele2eta=ele2Eta,
			weight=skim_weight(weights.weight() * cuts),
		)
		out["ele2phi"].fill(
			dataset=dataset,
			ele2phi=ele2Phi,
			weight=skim_weight(weights.weight() * cuts),
		)
		out["ele3pt"].fill(
			dataset=dataset, ele3pt=ele3PT, weight=skim_weight(weights.weight() * cuts)
		)

		# -- Photon -- #

		out["phopt"].fill(
			dataset=dataset, phopt=phoPT, weight=skim_weight(weights.weight() * cuts)
		)
		out["phoeta"].fill(
			dataset=dataset, phoeta=phoEta, weight=skim_weight(weights.weight() * cuts)
		)
		out["phophi"].fill(
			dataset=dataset, phophi=phoPhi, weight=skim_weight(weights.weight() * cuts)
		)

		if len(Pho_EE.pt) != 0:

			out["pho_EE_pt"].fill(
				dataset=dataset,
				pho_EE_pt=Pho_EE_PT,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_eta"].fill(
				dataset=dataset,
				pho_EE_eta=Pho_EE_Eta,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_phi"].fill(
				dataset=dataset,
				pho_EE_phi=Pho_EE_Phi,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_hoe"].fill(
				dataset=dataset,
				pho_EE_hoe=Pho_EE_hoe,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_sieie"].fill(
				dataset=dataset,
				pho_EE_sieie=Pho_EE_sieie,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_Iso_chg"].fill(
				dataset=dataset,
				pho_EE_Iso_chg=Pho_EE_Iso_charge,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)

		if len(Pho_EB.pt) != 0:
			out["pho_EB_pt"].fill(
				dataset=dataset,
				pho_EB_pt=Pho_EB_PT,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_eta"].fill(
				dataset=dataset,
				pho_EB_eta=Pho_EB_Eta,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_phi"].fill(
				dataset=dataset,
				pho_EB_phi=Pho_EB_Phi,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_hoe"].fill(
				dataset=dataset,
				pho_EB_hoe=Pho_EB_hoe,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_sieie"].fill(
				dataset=dataset,
				pho_EB_sieie=Pho_EB_sieie,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_Iso_chg"].fill(
				dataset=dataset,
				pho_EB_Iso_chg=Pho_EB_Iso_charge,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)

		return out
コード例 #16
0
ファイル: bbwwprocessor.py プロジェクト: dykim1/boostedhiggs
    def process(self, events):

        # get meta infos
        dataset = events.metadata["dataset"]
        isRealData = not hasattr(events, "genWeight")
        n_events = len(events)
        selection = processor.PackedSelection()
        weights = processor.Weights(n_events)
        output = self.accumulator.identity()

        # weights
        if not isRealData:
            output['sumw'][dataset] += awkward1.sum(events.genWeight)
        
        # trigger
        triggers = {}
        for channel in ["e","mu"]:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._trigger[channel]:
                try:
                    trigger = trigger | events.HLT[t]
                except:
                    warnings.warn("Missing trigger %s" % t, RuntimeWarning)
            triggers[channel] = trigger
            
        # met filter
        met_filters = ["goodVertices",
                       "globalSuperTightHalo2016Filter",
                       "HBHENoiseFilter",
                       "HBHENoiseIsoFilter",
                       "EcalDeadCellTriggerPrimitiveFilter",
                       "BadPFMuonFilter",
                       ]
        met_filters_mask = np.ones(len(events), dtype='bool')
        for t in met_filters:
            met_filters_mask = met_filters_mask & events.Flag[t]
        selection.add("met_filter", awkward1.to_numpy(met_filters_mask))
        
        # load objects
        muons = events.Muon
        electrons = events.Electron
        jets = events.Jet
        fatjets = events.FatJet
        subjets = events.SubJet
        fatjetsLS = events.FatJetLS
        met = events.MET
        
        # muons
        goodmuon = (
            (muons.mediumId)
            & (muons.miniPFRelIso_all <= 0.2)
            & (muons.pt >= 27)
            & (abs(muons.eta) <= 2.4)
            & (abs(muons.dz) < 0.1)
            & (abs(muons.dxy) < 0.05)
            & (muons.sip3d < 4)
        )
        good_muons = muons[goodmuon]
        ngood_muons = awkward1.sum(goodmuon, axis=1)

        # electrons
        goodelectron = (
            (electrons.mvaFall17V2noIso_WP90)
            & (electrons.pt >= 30)
            & (abs(electrons.eta) <= 1.479)
            & (abs(electrons.dz) < 0.1)
            & (abs(electrons.dxy) < 0.05)
            & (electrons.sip3d < 4)
        )
        good_electrons = electrons[goodelectron]
        ngood_electrons = awkward1.sum(goodelectron, axis=1)
        
        # good leptons
        good_leptons = awkward1.concatenate([good_muons, good_electrons], axis=1)
        good_leptons = good_leptons[awkward1.argsort(good_leptons.pt)]
        
        # lepton candidate
        candidatelep = awkward1.firsts(good_leptons)
        
        # lepton channel selection
        selection.add("ch_e", awkward1.to_numpy((triggers["e"]) & (ngood_electrons==1) & (ngood_muons==0))) # not sure if need to require 0 muons or 0 electrons in the next line
        selection.add("ch_mu", awkward1.to_numpy((triggers["mu"]) & (ngood_electrons==0) & (ngood_muons==1)))
        
        # jets
        ht = awkward1.sum(jets[jets.pt > 30].pt,axis=1)
        selection.add("ht_400", awkward1.to_numpy(ht>=400))
        goodjet = (
            (jets.isTight)
            & (jets.pt > 30)
            & (abs(jets.eta) <= 2.5)
            )
        good_jets = jets[goodjet]

        # fat jets
        jID = "isTight"
        # TODO: add mass correction

        # a way to get the first two subjets
        # cart = awkward1.cartesian([fatjets, subjets], nested=True)
        # idxes = awkward1.pad_none(awkward1.argsort(cart['0'].delta_r(cart['1'])), 2, axis=2)
        # sj1 = subjets[idxes[:,:,0]]
        # sj2 = subjets[idxes[:,:,1]]
        
        good_fatjet = (
            (getattr(fatjets, jID))
            & (abs(fatjets.eta) <= 2.4)
            & (fatjets.pt > 50)
            & (fatjets.msoftdrop > 30)
            & (fatjets.msoftdrop < 210)
            #& (fatjets.pt.copy(content=fatjets.subjets.content.counts) == 2) # TODO: require 2 subjets?
            # this can probably be done w FatJet_subJetIdx1 or FatJet_subJetIdx2
            & (awkward1.all(fatjets.subjets.pt >= 20))
            & (awkward1.all(abs(fatjets.subjets.eta) <= 2.4))
        )
        good_fatjets = fatjets[good_fatjet]

        # hbb candidate
        mask_hbb = (
            (good_fatjets.pt > 200)
            & (good_fatjets.delta_r(candidatelep) > 2.0)
            )
        candidateHbb = awkward1.firsts(good_fatjets[mask_hbb])

        # b-tag #& (good_fatjets.particleNetMD_Xbb > 0.9)
        selection.add('hbb_btag',awkward1.to_numpy(candidateHbb.deepTagMD_ZHbbvsQCD >= 0.8)) # score would be larger for tight category (0.97)  
        
        # No AK4 b-tagged jets away from bb jet
        jets_HbbV = jets[good_jets.delta_r(candidateHbb) >= 1.2]
        selection.add('hbb_vetobtagaway',  awkward1.to_numpy(awkward1.max(jets_HbbV.btagDeepB, axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._year]['medium']))
        
        # fat jets Lepton Subtracted
        # wjj candidate
        mask_wjj = (
            (fatjetsLS.pt > 50)
            & (fatjetsLS.delta_r(candidatelep) > 1.2)
            # need to add 2 subjets w pt > 20 & eta<2.4
            # need to add ID?
            )
        candidateWjj = awkward1.firsts(fatjetsLS[mask_wjj][awkward1.argmin(fatjetsLS[mask_wjj].delta_r(candidatelep),axis=1,keepdims=True)])
        # add t2/t1 <= 0.75 (0.45 HP)
        selection.add('hww_mass',  awkward1.to_numpy(candidateWjj.mass >= 10))

        print('met ',met)
        # wjjlnu info
        #HSolverLiInfo  hwwInfoLi;
        # qqSDmass = candidateWjj.msoftdrop
        # hwwLi   = hSolverLi->minimize(candidatelep.p4(), met.p4(), wjjcand.p4(), qqSDmass, hwwInfoLi)
        #neutrino = hwwInfoLi.neutrino;
        #wlnu     = hwwInfoLi.wlnu;
        #wqq      = hwwInfoLi.wqqjet;
        #hWW      = hwwInfoLi.hWW;
        #wwDM     = PhysicsUtilities::deltaR( wlnu,wqq) * hWW.pt()/2.0;
        # add dlvqq <= 11 (2.5 HP)
               
        # in the meantime let's add the mass
        '''
        mm = (candidatejet - candidatelep).mass2
        jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*candidatejet.mass
        joffshell = jmass < 62.5
        massassumption = 80.*joffshell + (125 - 80.)*~joffshell
        x = massassumption**2/(2*candidatelep.pt*met.pt) + np.cos(candidatelep.phi - met.phi)
        met_eta = (
            (x < 1)*np.arcsinh(x*np.sinh(candidatelep.eta))
            + (x > 1)*(
                candidatelep.eta - np.sign(candidatelep.eta)*np.arccosh(candidatelep.eta)
                )
            )
        met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),np.array([0.]),np.array([0.]),np.array([0.]))
        if met.size > 0:
            met_p4 = TLorentzVectorArray.from_ptetaphim(met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size))
        
        # hh system
        candidateHH = candidateWjj + met_p4 + candidateHbb
        selection.add('hh_mass', candidateHH.mass >= 700)
        selection.add('hh_centrality', candidateHH.pt/candidateHH.mass >= 0.3)
        '''
        
        channels = {"e": ["met_filter","ch_e","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"], #,"hh_mass","hh_centrality"],
                    "mu": ["met_filter","ch_mu","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"] #,"hh_mass","hh_centrality"],
                    }

        # need to add gen info
        
        if not isRealData:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            
        for channel, cuts in channels.items():
            allcuts = set()
            output['cutflow'].fill(dataset=dataset, channel=channel, cut=0, weight=weights.weight())
            for i, cut in enumerate(cuts):
                allcuts.add(cut)
                cut = selection.all(*allcuts)
                output['cutflow'].fill(dataset=dataset, channel=channel, cut=i + 1, weight=weights.weight()[cut])

        return output
コード例 #17
0
    def process(self, df):
        # Dataset parameters
        dataset = df['dataset']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']

        ### Recover objects, selection, functions and others...
        # Objects
        isTightMuon = self._objects['isTightMuon']
        isTightElectron = self._objects['isTightElectron']
        isGoodJet = self._objects['isGoodJet']

        # Corrections
        GetMuonIsoSF = self._corrections['getMuonIso']
        GetMuonIDSF = self._corrections['getMuonID']

        # Selection
        passNJets = self._selection['passNJets']
        passMETcut = self._selection['passMETcut']

        # Functions
        pow2 = self._functions['pow2']

        # Initialize objects
        met = Initialize({
            'pt': df['MET_pt'],
            'eta': 0,
            'phi': df['MET_phi'],
            'mass': 0
        })
        e = Initialize({
            'pt': df['Electron_pt'],
            'eta': df['Electron_eta'],
            'phi': df['Electron_phi'],
            'mass': df['Electron_mass']
        })
        mu = Initialize({
            'pt': df['Muon_pt'],
            'eta': df['Muon_eta'],
            'phi': df['Muon_phi'],
            'mass': df['Muon_mass']
        })
        j = Initialize({
            'pt': df['Jet_pt'],
            'eta': df['Jet_eta'],
            'phi': df['Jet_phi'],
            'mass': df['Jet_mass']
        })

        # Electron selection
        for key in self._e:
            e[key] = e.pt.zeros_like()
            if self._e[key] in df:
                e[key] = df[self._e[key]]
        e['istight'] = isTightElectron(e.pt, e.eta, e.dxy, e.dz, e.id, year)
        leading_e = e[e.pt.argmax()]
        leading_e = leading_e[leading_e.istight.astype(np.bool)]
        nElec = e.counts

        # Muon selection
        for key in self._mu:
            mu[key] = mu.pt.zeros_like()
            if self._mu[key] in df:
                mu[key] = df[self._mu[key]]
        mu['istight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.iso,
                                    mu.tight_id, year)
        leading_mu = mu[mu.pt.argmax()]
        leading_mu = leading_mu[leading_mu.istight.astype(np.bool)]
        nMuon = mu.counts

        # Jet selection
        j['deepcsv'] = df['Jet_btagDeepB']
        j['deepflv'] = df['Jet_btagDeepFlavB']
        for key in self._jet:
            j[key] = j.pt.zeros_like()
            if self._jet[key] in df:
                j[key] = df[self._jet[key]]

        j['isgood'] = isGoodJet(j.pt, j.eta, j.id)
        j['isclean'] = ~j.match(e, 0.4) & ~j.match(mu, 0.4) & j.isgood.astype(
            np.bool)
        j0 = j[j.pt.argmax()]
        j0 = j0[j0.isclean.astype(np.bool)]
        nJets = j.counts

        # Dilepton pair
        ele_pairs = e.distincts()
        diele = leading_e
        leading_diele = leading_e
        if ele_pairs.i0.content.size > 0:
            diele = ele_pairs.i0 + ele_pairs.i1
            leading_diele = diele[diele.pt.argmax()]

        mu_pairs = mu.distincts()
        dimu = leading_mu
        leading_dimu = leading_mu
        if mu_pairs.i0.content.size > 0:
            dimu = mu_pairs.i0 + mu_pairs.i1
            leading_dimu = dimu[dimu.pt.argmax()]
        mmumu = leading_dimu.mass

        # Triggers

        # MET filters

        # Weights
        genw = np.ones_like(df['MET_pt']) if isData else df['genWeight']
        weights = processor.Weights(df.size)
        weights.add('norm', xsec / sow * genw)

        # Selections and cuts
        selections = processor.PackedSelection()
        channels = ['em', 'mm', 'ee']
        selections.add('em', (nElec == 1) & (nMuon == 1))
        selections.add('ee', (nElec >= 2))
        selections.add('mm', (nMuon >= 2))

        levels = ['dilepton', '2jets']
        selections.add('dilepton',
                       (nElec >= 2) | (nMuon >= 2) | ((nElec + nMuon) >= 2))
        selections.add('2jets', (nJets >= 2))

        # Variables

        # Fill Histos
        hout = self.accumulator.identity()
        hout['dummy'].fill(sample=dataset, dummy=1, weight=df.size)

        for ch in channels:
            for lev in levels:
                weight = weights.weight()
                cuts = [ch] + [lev]
                cut = selections.all(*cuts)
                invmass_flat = mmumu[cut].flatten()
                weights_flat = (~np.isnan(mmumu[cut]) * weight[cut]).flatten()

                hout['invmass'].fill(
                    sample=dataset,
                    channel=ch,
                    level=lev,
                    invmass=invmass_flat,
                    weight=weights_flat)  #*selections.all(*{'mm'})
        #flat_variables = {k: v[cut].flatten() for k, v in variables.items()}
        #flat_weights = {k: (~np.isnan(v[cut])*weight[cut]).flatten() for k, v in variables.items()}

        #hout['invmass'].fill(sample=dataset, channel='mm', level="dilepton", invmass=mmumu, weight=np.ones_like(df['MET_pt']))#weight=weights.weight())#*selections.all(*{'mm'})

        return hout
コード例 #18
0
    def process(self, events):

        dataset = events.metadata['dataset']

        isData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        hout = self.accumulator.identity()

        ###
        #Getting ids from .coffea files
        ###

        get_msd_weight = self._corrections['get_msd_weight']
        isLooseMuon = self._ids['isLooseMuon']
        isTightMuon = self._ids['isTightMuon']
        isGoodFatJet = self._ids['isGoodFatJet']

        match = self._common['match']

        ###
        #Initialize physics objects
        ###

        mu = events.Muon
        leading_mu = mu[mu.pt.argmax()]

        fj = events.AK15Puppi
        fj['sd'] = fj.subjets.sum()
        fj['isgood'] = isGoodFatJet(fj.sd.pt, fj.sd.eta, fj.jetId)
        fj['T'] = TVector2Array.from_polar(fj.pt, fj.phi)
        fj['msd_raw'] = (fj.subjets * (1 - fj.subjets.rawFactor)).sum().mass
        fj['msd_corr'] = fj.msd_raw * awkward.JaggedArray.fromoffsets(
            fj.array.offsets,
            np.maximum(
                1e-5, get_msd_weight(fj.sd.pt.flatten(), fj.sd.eta.flatten())))
        probQCD = fj.probQCDbb + fj.probQCDcc + fj.probQCDb + fj.probQCDc + fj.probQCDothers
        probZHbb = fj.probZbb + fj.probHbb
        fj['ZHbbvsQCD'] = probZHbb / (probZHbb + probQCD)
        fj['tau21'] = fj.tau2 / fj.tau1

        SV = events.SV

        ###
        # Calculating weights
        ###
        if not isData:

            gen = events.GenPart

            gen['isb'] = (abs(gen.pdgId) == 5) & gen.hasFlags(
                ['fromHardProcess', 'isLastCopy'])
            jetgenb = fj.sd.cross(gen[gen.isb], nested=True)
            bmatch = ((jetgenb.i0.delta_r(jetgenb.i1) < 1.5).sum()
                      == 1) & (gen[gen.isb].counts > 0)
            fj['isb'] = bmatch

            bmatch = ((jetgenb.i0.delta_r(jetgenb.i1) < 1.5).sum()
                      == 2) & (gen[gen.isb].counts > 0)
            fj['isbb'] = bmatch

            gen['isc'] = (abs(gen.pdgId) == 4) & gen.hasFlags(
                ['fromHardProcess', 'isLastCopy'])
            jetgenc = fj.sd.cross(gen[gen.isc], nested=True)
            cmatch = ((jetgenc.i0.delta_r(jetgenc.i1) < 1.5).sum()
                      == 1) & (gen[gen.isc].counts > 0)
            fj['isc'] = cmatch

            cmatch = ((jetgenc.i0.delta_r(jetgenc.i1) < 1.5).sum()
                      == 2) & (gen[gen.isc].counts > 0)
            fj['iscc'] = cmatch

        ##### axis=1 option to remove boundaries between fat-jets #####
        ##### copy (match jaggedness and shape of array) the contents of crossed array into the fat-jet subjets #####
        ##### we're not use copy since it keeps the original array type #####
        ##### fj.subjets is a TLorentzVectorArray #####
        mu = mu[mu.isGlobal]  ## Use a global muon for QCD events
        jetmu = fj.subjets.flatten(axis=1).cross(mu, nested=True)
        mask = (mu.counts > 0) & ((jetmu.i0.delta_r(jetmu.i1) < 0.4) &
                                  ((jetmu.i1.pt / jetmu.i0.pt) < 0.7) &
                                  (jetmu.i1.pt > 7)).sum() == 1

        ##### Three steps to match the jaggedness of the mask array to the fj.subjets array #####
        ##### Using the offset function to copy contents not the type of the array #####
        step1 = fj.subjets.flatten()
        step2 = awkward.JaggedArray.fromoffsets(step1.offsets, mask.content)
        step2 = step2.pad(1).fillna(
            0)  ##### Fill None for empty arrays and convert None to False
        step3 = awkward.JaggedArray.fromoffsets(fj.subjets.offsets, step2)

        ##### fatjet with two subjets matched with muons
        fj['withmu'] = step3.sum() == 2

        ###
        # Selections
        ###

        #### trigger selection ####
        triggers = np.zeros(events.size, dtype=np.bool)
        for path in self._btagmu_triggers[self._year]:
            if path not in events.HLT.columns: continue
            triggers = triggers | events.HLT[path]
        selection.add('btagmu_triggers', triggers)

        #### MET filters ####
        met_filters = np.ones(events.size, dtype=np.bool)
        if isData:
            met_filters = met_filters & events.Flag[
                'eeBadScFilter']  #this filter is recommended for data only
        for flag in AnalysisProcessor.met_filter_flags[self._year]:
            met_filters = met_filters & events.Flag[flag]
        selection.add('met_filters', met_filters)

        #### ak15 jet selection ####
        leading_fj = fj[fj.sd.pt.argmax()]
        leading_fj = leading_fj[leading_fj.isgood.astype(np.bool)]
        leading_fj = leading_fj[leading_fj.withmu.astype(np.bool)]

        #### SV selection for matched with leading ak15 jet ####
        SV['ismatched'] = match(SV, leading_fj, 1.5)
        #leading_SV = SV[SV.pt.argmax()]
        leading_SV = SV[SV.dxySig.argmax()]
        leading_SV = leading_SV[leading_SV.ismatched.astype(np.bool)]

        #fj_good = fj[fj.isgood.astype(np.bool)]
        #fj_withmu = fj_good[fj_good.withmu.astype(np.bool)]
        #fj_nwithmu = fj_withmu.counts

        selection.add('fj_pt', (leading_fj.sd.pt.max() > 250))
        selection.add(
            'fj_mass',
            (leading_fj.msd_corr.sum() > 50))  ## optionally also <130
        #selection.add('fj_tau21', (leading_fj.tau21.sum() < 0.3) )
        #selection.add('fjCoupledMu', (fj_nwithmu > 0) )

        print('Selections')
        print(selection.names, '\n')

        variables = {
            'ZHbbvsQCD': leading_fj.ZHbbvsQCD,
            'btagJP': leading_fj.btagJP,
            'tau21': leading_fj.tau21,
            'fjmass': leading_fj.msd_corr,
            'fj1pt': leading_fj.sd.pt,
            #'svmass':    leading_SV.mass,
            'svmass': np.log(leading_SV.mass),
            'svdxysig': leading_SV.dxySig
        }

        def fill(dataset, gentype, weight, cut):
            flat_variables = {
                k: v[cut].flatten()
                for k, v in variables.items()
            }
            flat_gentype = {
                k: (~np.isnan(v[cut]) * gentype[cut]).flatten()
                for k, v in variables.items()
            }
            flat_weight = {
                k: (~np.isnan(v[cut]) * weight[cut]).flatten()
                for k, v in variables.items()
            }

            #print('variables:', flat_variables)
            for histname, h in hout.items():
                if not isinstance(h, hist.Hist):
                    continue
                if histname not in variables:
                    continue
                elif histname == 'sumw':
                    continue
                elif histname == 'jptemplate' or histname == 'svtemplate':
                    continue
                else:
                    flat_variable = {histname: flat_variables[histname]}
                    h.fill(dataset=dataset,
                           gentype=flat_gentype[histname],
                           **flat_variable,
                           weight=flat_weight[histname])

        isFilled = False
        if isData:
            if not isFilled:
                hout['sumw'].fill(dataset=dataset, sumw=1, weight=1)
                isFilled = True

            cut = selection.all(*selection.names)
            vcut = np.zeros(events.size, dtype=np.int)
            hout['cutflow'].fill(dataset=dataset,
                                 cutname='nocut',
                                 cut=vcut,
                                 weight=np.ones(events.size))
            allcuts = set()
            ### cutflow fill
            for i, icut in enumerate(selection.names):
                allcuts.add(icut)
                jcut = selection.all(*allcuts)
                vcut = (i + 1) * jcut
                hout['cutflow'].fill(dataset=dataset,
                                     cutname=str(icut),
                                     cut=vcut,
                                     weight=jcut)

            ##### template for bb SF #####
            ##### btagjp template #####
            hout['jptemplate'].fill(dataset=dataset,
                                    gentype=np.zeros(events.size,
                                                     dtype=np.int),
                                    btagJP=leading_fj.btagJP.sum(),
                                    ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                                    weight=np.ones(events.size) * cut)
            ##### sv mass template #####
            hout['svtemplate'].fill(
                dataset=dataset,
                gentype=np.zeros(events.size, dtype=np.int),
                #svmass=leading_SV.mass.sum(),
                svmass=np.log(leading_SV.mass.sum()),
                ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                weight=np.ones(events.size) * cut)
            fill(dataset, np.zeros(events.size, dtype=np.int),
                 np.ones(events.size), cut)

        else:
            weights = processor.Weights(len(events))

            wgentype = {
                'bb': (leading_fj.isbb).sum(),
                'b': (~leading_fj.isbb & leading_fj.isb).sum(),
                'cc':
                (~leading_fj.isbb & ~leading_fj.isb & leading_fj.iscc).sum(),
                'c': (~leading_fj.isbb & ~leading_fj.isb & ~leading_fj.iscc
                      & leading_fj.isc).sum(),
                'other': (~leading_fj.isbb & ~leading_fj.isb & ~leading_fj.iscc
                          & ~leading_fj.isc).sum(),
            }
            vgentype = np.zeros(events.size, dtype=np.int)
            for gentype in self._gentype_map.keys():
                vgentype += self._gentype_map[gentype] * wgentype[gentype]

            if not isFilled:
                hout['sumw'].fill(dataset=dataset,
                                  sumw=1,
                                  weight=events.genWeight.sum())
                isFilled = True

            cut = selection.all(*selection.names)
            if 'QCD' in dataset:
                vcut = np.zeros(events.size, dtype=np.int)
                hout['cutflow'].fill(dataset=dataset,
                                     cutname='nocut',
                                     cut=vcut,
                                     weight=weights.weight())
                allcuts = set()
                ### cutflow fill
                for i, icut in enumerate(selection.names):
                    allcuts.add(icut)
                    jcut = selection.all(*allcuts)
                    vcut = (i + 1) * jcut
                    hout['cutflow'].fill(dataset=dataset,
                                         cutname=str(icut),
                                         cut=vcut,
                                         weight=weights.weight() * jcut)

                ### other variables
                fill(dataset, vgentype, weights.weight(), cut)

                ##### template for bb SF #####
                ##### btagjp template #####
                hout['jptemplate'].fill(dataset=dataset,
                                        gentype=vgentype,
                                        btagJP=leading_fj.btagJP.sum(),
                                        ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                                        weight=weights.weight() * cut)
                ##### sv mass template #####
                hout['svtemplate'].fill(
                    dataset=dataset,
                    gentype=vgentype,
                    #svmass=leading_SV.mass.sum(),
                    svmass=np.log(leading_SV.mass.sum()),
                    ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                    weight=np.ones(events.size) * cut)
            else:
                fill(dataset, vgentype, weights.weight(),
                     np.ones(events.size, dtype=np.int))

                ##### template for bb SF #####
                ##### btagjp template #####
                hout['jptemplate'].fill(dataset=dataset,
                                        gentype=vgentype,
                                        btagJP=leading_fj.btagJP.sum(),
                                        ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                                        weight=weights.weight())
                ##### sv mass template #####
                hout['svtemplate'].fill(
                    dataset=dataset,
                    gentype=vgentype,
                    #svmass=leading_SV.mass.sum(),
                    svmass=np.log(leading_SV.mass.sum()),
                    ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(),
                    weight=np.ones(events.size) * cut)

        return hout
コード例 #19
0
ファイル: dyProcessor.py プロジェクト: dntaylor/NanoAnalysis
    def process(self, events):
        logging.debug('starting process')
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']
        self._isData = dataset in [
            'SingleMuon', 'DoubleMuon', 'SingleElectron', 'DoubleEG', 'EGamma',
            'MuonEG'
        ]

        selection = processor.PackedSelection()

        # TODO: instead of cutflow, use processor.PackedSelection
        output['cutflow']['all events'] += events.size

        logging.debug('applying lumi mask')
        if self._isData:
            lumiMask = lumi_tools.LumiMask(self._corrections['golden'])
            events['passLumiMask'] = lumiMask(np.array(events.run),
                                              np.array(events.luminosityBlock))
        else:
            events['passLumiMask'] = np.ones_like(events.run, dtype=bool)
        passLumiMask = events.passLumiMask
        selection.add('lumiMask', passLumiMask)

        logging.debug('adding trigger')
        self._add_trigger(events)

        passHLT = events.passHLT
        selection.add('trigger', passHLT)
        output['cutflow']['pass trigger'] += passHLT.sum()
        # if no trigger: fast return
        if passHLT.sum() == 0:
            return output

        # require one good vertex
        logging.debug('checking vertices')
        passGoodVertex = (events.PV.npvsGood > 0)
        output['cutflow']['good vertex'] += passGoodVertex.sum()
        selection.add('goodVertex', passGoodVertex)

        # run rochester
        rochester = self._rochester
        _muon_offsets = events.Muon.pt.offsets
        _charge = events.Muon.charge
        _pt = events.Muon.pt
        _eta = events.Muon.eta
        _phi = events.Muon.phi
        if self._isData:
            _k = rochester.kScaleDT(_charge, _pt, _eta, _phi)
            # _kErr = rochester.kScaleDTerror(_charge, _pt, _eta, _phi)
        else:
            # for default if gen present
            _gpt = events.Muon.matched_gen.pt
            # for backup w/o gen
            _nl = events.Muon.nTrackerLayers
            _u = JaggedArray.fromoffsets(_muon_offsets,
                                         np.random.rand(*_pt.flatten().shape))
            _hasgen = (_gpt.fillna(-1) > 0)
            _kspread = rochester.kSpreadMC(_charge[_hasgen], _pt[_hasgen],
                                           _eta[_hasgen], _phi[_hasgen],
                                           _gpt[_hasgen])
            _ksmear = rochester.kSmearMC(_charge[~_hasgen], _pt[~_hasgen],
                                         _eta[~_hasgen], _phi[~_hasgen],
                                         _nl[~_hasgen], _u[~_hasgen])
            _k = np.ones_like(_pt.flatten())
            _k[_hasgen.flatten()] = _kspread.flatten()
            _k[~_hasgen.flatten()] = _ksmear.flatten()
            _k = JaggedArray.fromoffsets(_muon_offsets, _k)
            # _kErrspread = rochester.kSpreadMCerror(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen],
            #                                        _gpt[_hasgen])
            # _kErrsmear  = rochester.kSmearMCerror(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen],
            #                                       _nl[~_hasgen], _u[~_hasgen])
            # _kErr = np.ones_like(_pt.flatten())
            # _kErr[_hasgen.flatten()] = _kErrspread.flatten()
            # _kErr[~_hasgen.flatten()] = _kErrsmear.flatten()
            # _kErr = JaggedArray.fromoffsets(_muon_offsets, _kErr)

        mask = _pt.flatten() < 200
        rochester_pt = _pt.flatten()
        rochester_pt[mask] = (_k * _pt).flatten()[mask]
        events.Muon['pt'] = JaggedArray.fromoffsets(_muon_offsets,
                                                    rochester_pt)

        logging.debug('adding muon id')
        self._add_muon_id(events.Muon)
        logging.debug('adding electron id')
        self._add_electron_id(events.Electron)

        logging.debug('selecting muons')
        muonId = (events.Muon.passId > 0)
        muons = events.Muon[muonId]

        logging.debug('selecting electrons')
        electronId = (events.Electron.passId > 0)
        electrons = events.Electron[electronId]

        passTwoLeptons = (muons.counts >= 2) | (electrons.counts >= 2)
        output['cutflow']['two leptons'] += passTwoLeptons.sum()
        selection.add('twoLeptons', passTwoLeptons)

        # build cands
        # remake z to have same columns
        # pt eta phi mass charge pdgId
        logging.debug('rebuilding leptons')

        def rebuild(leptons):
            return JaggedCandidateArray.candidatesfromoffsets(
                leptons.offsets,
                pt=leptons.pt.flatten(),
                eta=leptons.eta.flatten(),
                phi=leptons.phi.flatten(),
                mass=leptons.mass.flatten(),
                charge=leptons.charge.flatten(),
                pdgId=leptons.pdgId.flatten(),
                # needed for electron SF
                etaSC=leptons.etaSC.flatten()
                if hasattr(leptons, 'etaSC') else leptons.eta.flatten(),
            )

        newMuons = rebuild(muons)
        newElectrons = rebuild(electrons)

        logging.debug('building 2 leptons')
        ee_cands = newElectrons.choose(2)
        mm_cands = newMuons.choose(2)

        # combine them
        z_cands = JaggedArray.concatenate([ee_cands, mm_cands], axis=1)

        def bestcombination(zcands):
            good_charge = sum(zcands[str(i)]['charge'] for i in range(2)) == 0
            # this keeps the first z cand in each event
            # should instead sort the best first
            # TODO: select best
            zcands = zcands[good_charge][:, :1]
            return zcands

        logging.debug('selecting best combinations')
        z_cands = bestcombination(z_cands)

        z1 = np.zeros_like(z_cands['p4'].pt.flatten(), dtype='i')
        z2 = np.ones_like(z_cands['p4'].pt.flatten(), dtype='i')
        z1[(z_cands['0']['p4'].pt.flatten() <
            z_cands['1']['p4'].pt.flatten())] = 1
        z2[(z_cands['0']['p4'].pt.flatten() <
            z_cands['1']['p4'].pt.flatten())] = 0
        z1 = JaggedArray.fromoffsets(z_cands.offsets, z1)
        z2 = JaggedArray.fromoffsets(z_cands.offsets, z2)

        passZCand = (z_cands.counts > 0)
        output['cutflow']['z cand'] += passZCand.sum()
        selection.add('zCand', passZCand)

        passMassWindow = (passZCand & z_cands[(
            (z_cands.p4.mass > 60) & (z_cands.p4.mass < 120))].counts > 0)
        output['cutflow']['mass window'] += passMassWindow.sum()
        selection.add('massWindow', passMassWindow)

        # im sure there is a better way, but for now just do this
        def get_lepton_values(zl, key):
            val = np.zeros_like(zl.flatten(), dtype=float)
            if len(val) == 0:
                return JaggedArray.fromoffsets(zl.offsets, val)
            for i in range(2):
                mask = (i == zl.flatten())
                if key == 'pt':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].pt
                elif key == 'eta':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].eta
                elif key == 'phi':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].phi
                elif key == 'mass':
                    val[mask] = z_cands[passZCand][str(
                        i)].flatten()[mask]['p4'].mass
                else:
                    val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key]
            return JaggedArray.fromoffsets(zl.offsets, val)

        z1pt = get_lepton_values(z1, 'pt')
        z2pt = get_lepton_values(z2, 'pt')
        passPt = ((z1pt > 30) & (z2pt > 20)).counts > 0
        output['cutflow']['pt threshold'] += passPt.sum()
        selection.add('ptThreshold', passPt)

        chanSels = {}
        z1pdg = get_lepton_values(z1, 'pdgId')
        z2pdg = get_lepton_values(z2, 'pdgId')
        for chan in ['ee', 'mm']:
            if chan == 'ee':
                pdgIds = (11, 11)
            if chan == 'mm':
                pdgIds = (13, 13)
            chanSels[chan] = ((abs(z1pdg) == pdgIds[0])
                              & (abs(z2pdg) == pdgIds[1]))

        weights = processor.Weights(events.run.size)
        if self._isData:
            output['sumw'][dataset] = 0  # always set to 0 for data
        else:
            output['sumw'][dataset] += events.genWeight.sum()
            weights.add('genWeight', events.genWeight)
            weights.add(
                'pileupWeight',
                self._corrections['pileupWeight'](events.Pileup.nPU),
                self._corrections['pileupWeightUp'](events.Pileup.nPU),
                self._corrections['pileupWeightDown'](events.Pileup.nPU),
            )
            zls = [z1, z2]
            # electron sf
            for ei, zl in enumerate(zls):
                ei = str(ei)
                eta = get_lepton_values(zl, 'etaSC')
                pt = get_lepton_values(zl, 'pt')
                electronRecoSF = self._corrections['electron_reco'](eta, pt)
                electronIdSF = self._corrections['electron_id_MVA90'](eta, pt)
                electronSF = np.ones_like(electronRecoSF.prod())
                if ei in ['0', '1']:
                    chans = ['ee']
                else:
                    chans = []
                for chan in chans:
                    # turns empty arrays into 0's, nonempty int 1's
                    chanSel = (chanSels[chan].ones_like().sum() > 0)
                    electronSF[chanSel] *= electronRecoSF[chanSel].prod()
                    electronSF[chanSel] *= electronIdSF[chanSel].prod()
                weights.add('electronSF' + ei, electronSF)

            # muon SF
            for mi, zl in enumerate(zls):
                mi = str(mi)
                eta = get_lepton_values(zl, 'eta')
                pt = get_lepton_values(zl, 'pt')
                if self._year == '2016':
                    idSF = self._corrections['muon_id_MediumID'](eta, pt)
                    isoSF = self._corrections['muon_iso_TightRelIso_MediumID'](
                        eta, pt)
                else:
                    idSF = self._corrections['muon_id_MediumPromptID'](
                        pt, abs(eta))
                    isoSF = self._corrections['muon_iso_TightRelIso_MediumID'](
                        pt, abs(eta))

                muonSF = np.ones_like(idSF.prod())
                if mi in ['0', '1']:
                    chans = ['mm']
                else:
                    chans = []
                for chan in chans:
                    # turns empty arrays into 0's, nonempty int 1's
                    chanSel = (chanSels[chan].ones_like().sum() > 0)
                    muonSF[chanSel] *= idSF[chanSel].prod()
                    muonSF[chanSel] *= isoSF[chanSel].prod()
                weights.add('muonSF' + mi, muonSF)

        logging.debug('filling')
        for sel in self._selections:
            if sel == 'massWindow':
                cut = selection.all('lumiMask', 'trigger', 'goodVertex',
                                    'twoLeptons', 'zCand', 'massWindow',
                                    'ptThreshold')
            for chan in ['ee', 'mm']:
                chanSel = chanSels[chan]
                weight = chanSel.astype(float) * weights.weight()

                output[sel + '_zmass'].fill(
                    dataset=dataset,
                    channel=chan,
                    mass=z_cands[cut].p4.mass.flatten(),
                    weight=weight[cut].flatten(),
                )
                output[sel + '_met'].fill(
                    dataset=dataset,
                    channel=chan,
                    met=events.MET.pt[cut],
                    weight=weight[cut].flatten(),
                )
                output[sel + '_pileup'].fill(
                    dataset=dataset,
                    channel=chan,
                    npvs=events.PV.npvs[cut],
                    weight=weight[cut].flatten(),
                )

        return output
コード例 #20
0
ファイル: purity.py プロジェクト: GageDeZoort/decaf
    def process(self, events):

        dataset = events.metadata['dataset']
        isData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        hout = self.accumulator.identity()
        match = self._common['match']

        isLooseElectron = self._ids['isLooseElectron']
        isLooseMuon = self._ids['isLooseMuon']
        isLoosePhoton = self._ids['isLoosePhoton']
        isTightPhoton = self._ids['isTightPhoton']
        isGoodJet = self._ids['isGoodJet']

        #### Select loose muon and electron to select clean photon
        mu = events.Muon
        mu['isloose'] = isLooseMuon(mu.pt, mu.eta, mu.pfRelIso04_all,
                                    mu.looseId, self._year)
        mu_loose = mu[mu.isloose.astype(np.bool)]

        e = events.Electron
        e['isclean'] = ~match(e, mu_loose, 0.3)
        e['isloose'] = isLooseElectron(e.pt, e.eta + e.deltaEtaSC, e.dxy, e.dz,
                                       e.cutBased, self._year)
        e_clean = e[e.isclean.astype(np.bool)]
        e_loose = e_clean[e_clean.isloose.astype(np.bool)]

        #### Consider clean and tight photon for purity measurement
        pho = events.Photon
        pho['isclean'] = ~match(pho, mu_loose, 0.5) & ~match(pho, e_loose, 0.5)

        _id = 'cutBasedBitmap'
        if self._year == '2016':
            _id = 'cutBased'

        def isPurityPhoton(pt, medium_id):
            mask = ~(pt == np.nan)
            if self._year == '2016':
                mask = (pt > 200) & (medium_id >= 2)
            else:
                mask = (pt > 200) & ((medium_id & 2) == 2)
            return mask

        pho['isloose'] = isLoosePhoton(pho.pt, pho.eta, pho[_id],
                                       self._year) & (pho.electronVeto)
        pho['ispurity'] = isPurityPhoton(
            pho.pt, pho[_id]) & (pho.isScEtaEB) & (pho.electronVeto)
        pho_clean = pho[pho.isclean.astype(np.bool)]
        pho_loose = pho_clean[pho_clean.isloose.astype(np.bool)]
        pho_purity = pho_clean[pho_clean.ispurity.astype(np.bool)]
        pho_nosieie = pho_clean[(pho_clean.pt > 200) & (pho_clean.isScEtaEB) &
                                (pho_clean.electronVeto)
                                & medium_id_no_sieie(pho_clean)]
        pho_nosieie_inv_iso = pho_clean[(pho_clean.pt > 200)
                                        & (pho_clean.isScEtaEB) &
                                        (pho_clean.electronVeto) &
                                        medium_id_no_sieie_inv_iso(pho_clean)]

        #### Consider AK4 jet
        def isPurityJet(pt, eta, jet_id):
            mask = (pt > 30) & (abs(eta) < 2.4) & ((jet_id & 2) == 2)
            return mask

        j = events.Jet
        #30 GeV cut on jet pT, we need to check later
        #j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId, j.neHEF, j.neEmEF, j.chHEF, j.chEmEF)
        j['ispurity'] = isPurityJet(j.pt, j.eta, j.jetId)
        j['isclean'] = ~match(j, e_loose, 0.4) & ~match(
            j, mu_loose, 0.4) & ~match(j, pho_loose, 0.4)
        j_purity = j[j.ispurity.astype(np.bool)]
        j_clean = j_purity[j_purity.isclean.astype(np.bool)]
        j_nclean = j_clean.counts

        met = events.MET

        #### Genweights
        weights = processor.Weights(len(events), storeIndividual=True)

        if isData:
            weights.add('genw', np.ones(events.size))
        else:
            weights.add('genw', events.genWeight)

        #### MET filter & single photon trigger
        met_filters = np.ones(events.size, dtype=np.bool)
        if isData: met_filters = met_filters & events.Flag['eeBadScFilter']
        for flag in PhotonPurity.met_filter_flags[self._year]:
            met_filters = met_filters & events.Flag[flag]
        #selection.add('met_filters',met_filters)

        triggers = np.zeros(events.size, dtype=np.bool)
        for path in self._singlephoton_triggers[self._year]:
            if path not in events.HLT.columns: continue
            triggers = triggers | events.HLT[path]
        #selection.add('singlephoton_triggers', triggers)

        #selection.add('jet_cut', (j_nclean>0))
        #selection.add('met60', (met.pt<60))

        event_mask = met_filters & triggers & (met.pt < 60) & (j_nclean > 0)

        hout['count'].fill(dataset=dataset,
                           cat='medium',
                           sieie=pho_purity.sieie[event_mask].flatten(),
                           pt=pho_purity.pt[event_mask].flatten(),
                           weight=weight_shape(pho_purity.sieie[event_mask],
                                               weights.weight()[event_mask]))

        hout['count'].fill(dataset=dataset,
                           cat='medium_nosieie',
                           sieie=pho_nosieie.sieie[event_mask].flatten(),
                           pt=pho_nosieie.pt[event_mask].flatten(),
                           weight=weight_shape(pho_nosieie.sieie[event_mask],
                                               weights.weight()[event_mask]))

        hout['count'].fill(
            dataset=dataset,
            cat='medium_nosieie_invertiso',
            sieie=pho_nosieie_inv_iso.sieie[event_mask].flatten(),
            pt=pho_nosieie_inv_iso.pt[event_mask].flatten(),
            weight=weight_shape(pho_nosieie_inv_iso.sieie[event_mask],
                                weights.weight()[event_mask]))

        if isData:
            hout['sumw'].fill(dataset=dataset, sumw=1, weight=1)
        else:
            hout['sumw'].fill(dataset=dataset,
                              sumw=1,
                              weight=events.genWeight.sum())

        return hout
コード例 #21
0
    def process(self, events):
        # Dataset parameters
        dataset = events.metadata['dataset']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']
        datasets = [
            'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon',
            'DoubleElectron'
        ]
        for d in datasets:
            if d in dataset: dataset = dataset.split('_')[0]

        ### Recover objects, selection, functions and others...
        # Objects
        isTightMuon = self._objects['isTightMuonPOG']
        isTightElectron = self._objects['isTightElectronPOG']
        isGoodJet = self._objects['isGoodJet']
        isClean = self._objects['isClean']
        isMuonMVA = self._objects[
            'isMuonMVA']  #isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15)
        isElecMVA = self._objects[
            'isElecMVA']  #isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15)

        # Corrections
        GetMuonIsoSF = self._corrections['getMuonIso']
        GetMuonIDSF = self._corrections['getMuonID']

        # Selection
        passNJets = self._selection['passNJets']
        passMETcut = self._selection['passMETcut']
        passTrigger = self._selection['passTrigger']

        # Functions
        pow2 = self._functions['pow2']
        IsClosestToZ = self._functions['IsClosestToZ']
        GetGoodTriplets = self._functions['GetGoodTriplets']

        # Initialize objects
        met = events.MET
        e = events.Electron
        mu = events.Muon
        j = events.Jet

        # Electron selection
        #e['isGood'] = e.pt.zeros_like()
        e['isGood'] = isElecMVA(e.pt,
                                e.eta,
                                e.dxy,
                                e.dz,
                                e.miniPFRelIso_all,
                                e.sip3d,
                                e.mvaTTH,
                                e.mvaFall17V2Iso,
                                e.lostHits,
                                e.convVeto,
                                e.tightCharge,
                                minpt=10)
        leading_e = e[e.pt.argmax()]
        leading_e = leading_e[leading_e.isGood.astype(np.bool)]

        # Muon selection
        mu['isGood'] = isMuonMVA(mu.pt,
                                 mu.eta,
                                 mu.dxy,
                                 mu.dz,
                                 mu.miniPFRelIso_all,
                                 mu.sip3d,
                                 mu.mvaTTH,
                                 mu.mediumPromptId,
                                 mu.tightCharge,
                                 minpt=10)
        leading_mu = mu[mu.pt.argmax()]
        leading_mu = leading_mu[leading_mu.isGood.astype(np.bool)]

        e = e[e.isGood.astype(np.bool)]
        mu = mu[mu.isGood.astype(np.bool)]
        nElec = e.counts
        nMuon = mu.counts

        twoLeps = (nElec + nMuon) == 2
        threeLeps = (nElec + nMuon) == 3
        twoElec = (nElec == 2)
        twoMuon = (nMuon == 2)
        e0 = e[e.pt.argmax()]
        m0 = mu[mu.pt.argmax()]

        # Jet selection
        j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId)
        j['isclean'] = isClean(j, e, mu)
        goodJets = j[(j.isclean) & (j.isgood)]
        njets = goodJets.counts
        ht = goodJets.pt.sum()
        j0 = goodJets[goodJets.pt.argmax()]
        nbtags = goodJets[goodJets.btagDeepFlavB > 0.2770].counts

        ##################################################################
        ### 2 same-sign leptons
        ##################################################################

        # emu
        singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)]
        singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)]
        em = singe.cross(singm)
        emSSmask = (em.i0.charge * em.i1.charge > 0)
        emSS = em[emSSmask]
        nemSS = len(emSS.flatten())

        # ee and mumu
        # pt>-1 to preserve jagged dimensions
        ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)]
        mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)]

        eepairs = ee.distincts()
        eeSSmask = (eepairs.i0.charge * eepairs.i1.charge > 0)
        eeonZmask = (np.abs((eepairs.i0 + eepairs.i1).mass - 91) < 15)
        eeoffZmask = (eeonZmask == 0)

        mmpairs = mm.distincts()
        mmSSmask = (mmpairs.i0.charge * mmpairs.i1.charge > 0)
        mmonZmask = (np.abs((mmpairs.i0 + mmpairs.i1).mass - 91) < 15)
        mmoffZmask = (mmonZmask == 0)

        eeSSonZ = eepairs[eeSSmask & eeonZmask]
        eeSSoffZ = eepairs[eeSSmask & eeoffZmask]
        mmSSonZ = mmpairs[mmSSmask & mmonZmask]
        mmSSoffZ = mmpairs[mmSSmask & mmoffZmask]
        neeSS = len(eeSSonZ.flatten()) + len(eeSSoffZ.flatten())
        nmmSS = len(mmSSonZ.flatten()) + len(mmSSoffZ.flatten())

        #print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS))

        # Cuts
        eeSSmask = (eeSSmask[eeSSmask].counts > 0)
        mmSSmask = (mmSSmask[mmSSmask].counts > 0)
        eeonZmask = (eeonZmask[eeonZmask].counts > 0)
        eeoffZmask = (eeoffZmask[eeoffZmask].counts > 0)
        mmonZmask = (mmonZmask[mmonZmask].counts > 0)
        mmoffZmask = (mmoffZmask[mmoffZmask].counts > 0)
        emSSmask = (emSSmask[emSSmask].counts > 0)

        # njets

        ##################################################################
        ### 3 leptons
        ##################################################################

        # eem
        muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)]
        elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)]
        ee_eem = elec_eem.distincts()
        ee_eemZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs(
            (ee_eem.i0 + ee_eem.i1).mass - 91) < 15)
        ee_eemOffZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs(
            (ee_eem.i0 + ee_eem.i1).mass - 91) > 15)
        ee_eemZmask = (ee_eemZmask[ee_eemZmask].counts > 0)
        ee_eemOffZmask = (ee_eemOffZmask[ee_eemOffZmask].counts > 0)

        eepair_eem = (ee_eem.i0 + ee_eem.i1)
        trilep_eem = eepair_eem.cross(muon_eem)
        trilep_eem = (trilep_eem.i0 + trilep_eem.i1)

        # mme
        muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)]
        elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)]
        mm_mme = muon_mme.distincts()
        mm_mmeZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs(
            (mm_mme.i0 + mm_mme.i1).mass - 91) < 15)
        mm_mmeOffZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs(
            (mm_mme.i0 + mm_mme.i1).mass - 91) > 15)
        mm_mmeZmask = (mm_mmeZmask[mm_mmeZmask].counts > 0)
        mm_mmeOffZmask = (mm_mmeOffZmask[mm_mmeOffZmask].counts > 0)

        mmpair_mme = (mm_mme.i0 + mm_mme.i1)
        trilep_mme = mmpair_mme.cross(elec_mme)
        trilep_mme = (trilep_mme.i0 + trilep_mme.i1)
        mZ_mme = mmpair_mme.mass
        mZ_eem = eepair_eem.mass
        m3l_eem = trilep_eem.mass
        m3l_mme = trilep_mme.mass

        ### eee and mmm
        eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)]
        mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)]
        # Create pairs
        ee_pairs = eee.argchoose(2)
        mm_pairs = mmm.argchoose(2)

        # Select pairs that are SFOS.
        eeSFOS_pairs = ee_pairs[
            (np.abs(eee[ee_pairs.i0].pdgId) == np.abs(eee[ee_pairs.i1].pdgId))
            & (eee[ee_pairs.i0].charge != eee[ee_pairs.i1].charge)]
        mmSFOS_pairs = mm_pairs[
            (np.abs(mmm[mm_pairs.i0].pdgId) == np.abs(mmm[mm_pairs.i1].pdgId))
            & (mmm[mm_pairs.i0].charge != mmm[mm_pairs.i1].charge)]
        # Find the pair with mass closest to Z.
        eeOSSFmask = eeSFOS_pairs[np.abs((eee[eeSFOS_pairs.i0] +
                                          eee[eeSFOS_pairs.i1]).mass -
                                         91.2).argmin()]
        onZmask_ee = np.abs((eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]).mass -
                            91.2) < 15
        mmOSSFmask = mmSFOS_pairs[np.abs((mmm[mmSFOS_pairs.i0] +
                                          mmm[mmSFOS_pairs.i1]).mass -
                                         91.2).argmin()]
        onZmask_mm = np.abs((mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]).mass -
                            91.2) < 15
        offZmask_ee = np.abs((eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]).mass -
                             91.2) > 15
        offZmask_mm = np.abs((mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]).mass -
                             91.2) > 15

        # Create masks
        eeeOnZmask = onZmask_ee[onZmask_ee].counts > 0
        eeeOffZmask = offZmask_ee[offZmask_ee].counts > 0
        mmmOnZmask = onZmask_mm[onZmask_mm].counts > 0
        mmmOffZmask = offZmask_mm[offZmask_mm].counts > 0

        # Leptons from Z
        eZ0 = eee[eeOSSFmask.i0]
        eZ1 = eee[eeOSSFmask.i1]
        mZ0 = mmm[mmOSSFmask.i0]
        mZ1 = mmm[mmOSSFmask.i1]

        # Leptons from W
        eW = eee[~eeOSSFmask.i0 | ~eeOSSFmask.i1]
        mW = mmm[~mmOSSFmask.i0 | ~mmOSSFmask.i1]

        eZ = eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]
        triElec = eZ + eW
        mZ = mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]
        triMuon = mZ + mW

        mZ_eee = eZ.mass
        m3l_eee = triElec.mass
        mZ_mmm = mZ.mass
        m3l_mmm = triMuon.mass

        # Triggers
        #passTrigger = lambda events, n, m, o : np.ones_like(events['MET_pt'], dtype=np.bool) # XXX
        trig_eeSS = passTrigger(events, 'ee', isData, dataset)
        trig_mmSS = passTrigger(events, 'mm', isData, dataset)
        trig_emSS = passTrigger(events, 'em', isData, dataset)
        trig_eee = passTrigger(events, 'eee', isData, dataset)
        trig_mmm = passTrigger(events, 'mmm', isData, dataset)
        trig_eem = passTrigger(events, 'eem', isData, dataset)
        trig_mme = passTrigger(events, 'mme', isData, dataset)

        # MET filters

        # Weights
        genw = np.ones_like(
            events['MET_pt']) if isData else events['genWeight']
        weights = processor.Weights(events.size)
        weights.add('norm', genw if isData else (xsec / sow) * genw)

        # Selections and cuts
        selections = processor.PackedSelection()
        channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS']
        selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('emSS', (emSSmask) & (trig_emSS))

        channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ']
        selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem))
        selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem))
        selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme))
        selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme))

        channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ']
        selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee))
        selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee))
        selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm))
        selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm))

        levels = ['base', '2jets', '4jets', '4j1b', '4j2b']
        selections.add('base', (nElec + nMuon >= 2))
        selections.add('2jets', (njets >= 2))
        selections.add('4jets', (njets >= 4))
        selections.add('4j1b', (njets >= 4) & (nbtags >= 1))
        selections.add('4j2b', (njets >= 4) & (nbtags >= 2))

        # Variables
        invMass_eeSSonZ = (eeSSonZ.i0 + eeSSonZ.i1).mass
        invMass_eeSSoffZ = (eeSSoffZ.i0 + eeSSoffZ.i1).mass
        invMass_mmSSonZ = (mmSSonZ.i0 + mmSSonZ.i1).mass
        invMass_mmSSoffZ = (mmSSoffZ.i0 + mmSSoffZ.i1).mass
        invMass_emSS = (emSS.i0 + emSS.i1).mass

        varnames = {}
        varnames['met'] = met.pt
        varnames['ht'] = ht
        varnames['njets'] = njets
        varnames['nbtags'] = nbtags
        varnames['invmass'] = {
            'eeSSonZ': invMass_eeSSonZ,
            'eeSSoffZ': invMass_eeSSoffZ,
            'mmSSonZ': invMass_mmSSonZ,
            'mmSSoffZ': invMass_mmSSoffZ,
            'emSS': invMass_emSS,
            'eemSSonZ': mZ_eem,
            'eemSSoffZ': mZ_eem,
            'mmeSSonZ': mZ_mme,
            'mmeSSoffZ': mZ_mme,
            'eeeSSonZ': mZ_eee,
            'eeeSSoffZ': mZ_eee,
            'mmmSSonZ': mZ_mmm,
            'mmmSSoffZ': mZ_mmm,
        }
        varnames['m3l'] = {
            'eemSSonZ': m3l_eem,
            'eemSSoffZ': m3l_eem,
            'mmeSSonZ': m3l_mme,
            'mmeSSoffZ': m3l_mme,
            'eeeSSonZ': m3l_eee,
            'eeeSSoffZ': m3l_eee,
            'mmmSSonZ': m3l_mmm,
            'mmmSSoffZ': m3l_mmm,
        }
        varnames['e0pt'] = e0.pt
        varnames['e0eta'] = e0.eta
        varnames['m0pt'] = m0.pt
        varnames['m0eta'] = m0.eta
        varnames['j0pt'] = j0.pt
        varnames['j0eta'] = j0.eta
        varnames['counts'] = np.ones_like(events.MET.pt, dtype=np.int)

        # Fill Histos
        hout = self.accumulator.identity()
        hout['dummy'].fill(sample=dataset, dummy=1, weight=events.size)

        for var, v in varnames.items():
            for ch in channels2LSS + channels3L:
                for lev in levels:
                    weight = weights.weight()
                    cuts = [ch] + [lev]
                    cut = selections.all(*cuts)
                    weights_flat = weight[cut].flatten()
                    weights_ones = np.ones_like(weights_flat, dtype=np.int)
                    if var == 'invmass':
                        if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue
                        elif ch in ['eeeSSonZ', 'mmmSSonZ']:
                            continue  #values = v[ch]
                        else:
                            values = v[ch][cut].flatten()
                        hout['invmass'].fill(sample=dataset,
                                             channel=ch,
                                             cut=lev,
                                             invmass=values,
                                             weight=weights_flat)
                    elif var == 'm3l':
                        if ch in [
                                'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ',
                                'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ',
                                'mmmSSonZ'
                        ]:
                            continue
                        values = v[ch][cut].flatten()
                        hout['m3l'].fill(sample=dataset,
                                         channel=ch,
                                         cut=lev,
                                         m3l=values,
                                         weight=weights_flat)
                    else:
                        values = v[cut].flatten()
                        if var == 'ht':
                            hout[var].fill(ht=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'met':
                            hout[var].fill(met=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'njets':
                            hout[var].fill(njets=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'nbtags':
                            hout[var].fill(nbtags=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'counts':
                            hout[var].fill(counts=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_ones)
                        elif var == 'e0pt':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            hout[var].fill(e0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0pt':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            hout[var].fill(m0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'e0eta':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            hout[var].fill(e0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0eta':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            hout[var].fill(m0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0pt':
                            if lev == 'base': continue
                            hout[var].fill(j0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0eta':
                            if lev == 'base': continue
                            hout[var].fill(j0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)

        return hout
コード例 #22
0
    def process(self, df):
        dataset = df.metadata['dataset']
        isRealData = 'genWeight' not in df.columns
        output = self.accumulator.identity()
        selection = processor.PackedSelection()
        output = self.accumulator.identity()

        good = False
        goodMuon = ((df.Muon.pt > 27.) & (np.abs(df.Muon.eta) < 2.4))
        nmuons = goodMuon.sum()

        goodElectron = ((df.Electron.pt > 30.)
                        & (np.abs(df.Electron.eta) < 2.5))
        nelectrons = goodElectron.sum()

        df.FatJet['msdcorr'] = corrected_msoftdrop(df.FatJet)

        goodFatJet = ((df.FatJet.pt > 300.)
                      & (np.abs(df.FatJet.eta) < 2.4)
                      & (df.FatJet.msdcorr > 10.)
                      & (df.FatJet.isTight))
        nfatjets = goodFatJet.sum()

        if self._channel == 'muon':
            good = ((nmuons >= 1) & (nfatjets >= 1))
        else:
            good = ((nelectrons >= 1) & (nfatjets >= 1))
        events = df[good]

        if not isRealData:
            output['sumw'][dataset] += events.genWeight.sum()

        # trigger
        trigger = np.zeros(df.size, dtype='bool')
        for t in self._triggers[self._year + '_' + self._trigger]:
            try:
                trigger = trigger | df.HLT[t]
            except:
                warnings.warn("Missing trigger %s" % t, RuntimeWarning)
        selection.add('trigger', trigger[good])

        # Muons
        candidatemuon = events.Muon[:, 0:1]
        nmuons = events.Muon.counts

        # Electrons
        candidateelectron = events.Electron[:, 0:1]
        nelectrons = events.Electron.counts

        if self._channel == 'muon':
            candidatelep = candidatemuon
            selection.add('nootherlepton', (nelectrons == 0))
        else:
            candidatelep = candidateelectron
            selection.add('nootherlepton', (nmuons == 0))

        selection.add('iplepton', ((np.abs(candidatelep.dz) < 0.1)
                                   & (np.abs(candidatelep.dxy) < 0.05)).any())

        # FatJets
        ak8_lep_pair = candidatelep.cross(events.FatJet)
        ak8_lep_dR = ak8_lep_pair.i0.delta_r(ak8_lep_pair.i1)

        candidatejet = events.FatJet[ak8_lep_dR.argmin()]
        leadingjet = events.FatJet[:, 0:1]

        ak8_lep_dR_closest = candidatelep.delta_r(candidatejet)

        selection.add('jetkin', (candidatejet.pt > self._fjetptMIN).any())
        selection.add('jetmsd', (candidatejet.msdcorr > 20).any())
        selection.add('LSF3medium', (candidatejet.lsf3 > 0.7).any())
        selection.add('LSF3tight', (candidatejet.lsf3 > 0.78).any())
        selection.add('lepnearjet', (ak8_lep_dR.min() < 1.5))
        selection.add('lepinjet', (ak8_lep_dR.min() < 0.8))

        # FatJet substracted Lepton
        # sj1_sj2_btagDeepB_pair = candidatejet.LSsubJet1btagDeepB.cross(candidatejet.LSsubJet2btagDeepB)
        # fls_btagDeepB_max = max(sj1_sj2_btagDeepB_pair.i0,sj1_sj2_btagDeepB_pair.i1)

        # Jets
        jets = events.Jet[(events.Jet.pt > 30.)
                          & (abs(events.Jet.eta) < 2.5)
                          & (events.Jet.isTight)]
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        ak4_ak8_dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1))
        ak4_opposite = jets[(ak4_ak8_dphi > np.pi / 2).all()]
        ak4_away = jets[(ak4_ak8_dphi > 0.8).all()]

        selection.add(
            'antiak4btagMediumOppHem',
            ak4_opposite.btagDeepB.max() < self._btagWPs['med'][self._year])
        selection.add(
            'ak4btagMedium08',
            ak4_away.btagDeepB.max() < self._btagWPs['med'][self._year])

        # MET
        met = events.MET

        # MET eta with mass assumption
        mm = (candidatejet - candidatelep).mass2
        jmass = (mm > 0) * np.sqrt(np.maximum(
            0, mm)) + (mm < 0) * candidatejet.mass

        joffshell = jmass < 62.5
        massassumption = 80. * joffshell + (125 - 80.) * ~joffshell
        x = massassumption**2 / (2 * candidatelep.pt *
                                 met.pt) + np.cos(candidatelep.phi - met.phi)
        met_eta = ((x < 1) * np.arcsinh(x * np.sinh(candidatelep.eta)) +
                   (x > 1) *
                   (candidatelep.eta -
                    np.sign(candidatelep.eta) * np.arccosh(candidatelep.eta)))

        met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),
                                                    np.array([0.]),
                                                    np.array([0.]),
                                                    np.array([0.]))
        if met.size > 0:
            met_p4 = TLorentzVectorArray.from_ptetaphim(
                met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size))
            hmass = (candidatejet + met_p4).mass
        else:
            hmass = candidatejet.pt.zeros_like()

        # weights
        weights = processor.Weights(len(events), storeIndividual=True)
        if isRealData:
            genflavor = candidatejet.pt.zeros_like()
        else:
            try:
                weights.add('genweight', events.genWeight)
                add_pileup_weight(weights, events.Pileup.nPU, self._year)
                #print("Weight statistics: %r" % weights._weightStats)
            except:
                print('no gen weight')
            if 'TTTo' in dataset:
                genW, genW_idx = getParticles(
                    events, 24, ['fromHardProcess', 'isLastCopy'])
                genb, genb_idx = getParticles(
                    events, 5, ['fromHardProcess', 'isLastCopy'])
                genflavorW = matchedParticleFlavor(candidatelep, genW, 'child',
                                                   0.4)
                genflavorb = matchedParticleFlavor(candidatelep, genb, 'mom',
                                                   0.4)
                genflavor = getFlavor(genflavorW, genflavorb)
            elif (('hww_2017' in dataset) or ('GluGluHToWW' in dataset)):
                genH, genH_idx = getParticles(
                    events, 25, ['fromHardProcess', 'isLastCopy'])
                genW, genW_idx = getParticles(
                    events, 24, ['fromHardProcess', 'isLastCopy'])
                genE, genE_idx = getParticles(
                    events, 11, ['fromHardProcess', 'isFirstCopy'], 1)
                genM, genM_idx = getParticles(
                    events, 13, ['fromHardProcess', 'isFirstCopy'], 1)
                genT, genT_idx = getParticles(
                    events, 15, ['fromHardProcess', 'isFirstCopy'], 1)
                genQ, genQ_idx = getParticles(
                    events, [0, 5], ['fromHardProcess', 'isFirstCopy'])
                ishWW_qqelev = (genH.counts == 1) & (genW.counts == 2) & (
                    genE.counts == 1) & (genM.counts == 0) & (genT.counts == 0)
                ishWW_qqmuv = (genH.counts == 1) & (genW.counts == 2) & (
                    genM.counts == 1) & (genE.counts == 0) & (genT.counts == 0)
                ishWW_qqtauv = (genH.counts == 1) & (genW.counts == 2) & (
                    genT.counts == 1) & (genM.counts == 0) & (genE.counts == 0)
                ishWW_qqqq = (genH.counts == 1) & (genW.counts == 2) & (
                    genQ.counts == 4) & (genM.counts == 0) & (genE.counts == 0)
                ishWW_muvelev = (genH.counts == 1) & (genW.counts == 2) & (
                    genE.counts == 1) & (genM.counts == 1)
                ishWW_elevelev = (genH.counts == 1) & (genW.counts == 2) & (
                    genE.counts == 2) & (genM.counts == 0)
                ishWW_tauvtauv = (genH.counts == 1) & (genW.counts == 2) & (
                    genT.counts == 2) & (genM.counts == 0) & (genE.counts == 0)
                ishWW_muvmuv = (genH.counts == 1) & (genW.counts == 2) & (
                    genE.counts == 0) & (genM.counts == 2)
                genflavor = ((ishWW_qqelev) * 8 + (ishWW_qqmuv) * 9)
            else:
                genflavor = candidatejet.pt.zeros_like()

        # fill cutflow
        cutflow = [
            'trigger', 'jetkin', 'jetmsd', 'lepnearjet', 'lepinjet',
            'antiak4btagMediumOppHem', 'nootherlepton', 'iplepton',
            'LSF3medium', 'LSF3tight'
        ]
        allcuts = set()
        output['cutflow']['none'] += len(events)
        for cut in cutflow:
            allcuts.add(cut)
            output['cutflow'][cut] += selection.all(*allcuts).sum()

        regions = {}
        regions['presel'] = {'trigger', 'jetkin', 'jetmsd', 'lepinjet'}
        regions['antibtag'] = {
            'trigger', 'jetkin', 'jetmsd', 'antiak4btagMediumOppHem'
        }
        regions['noinjet'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepnearjet',
            'antiak4btagMediumOppHem'
        }
        regions['nolsf'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepinjet',
            'antiak4btagMediumOppHem'
        }  #,'nootherlepton'}
        regions['lsf'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight'
        }
        regions['bopp'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight',
            'antiak4btagMediumOppHem'
        }
        regions['lep'] = {
            'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight',
            'antiak4btagMediumOppHem', 'nootherlepton', 'iplepton'
        }

        for region in self._regions:
            selections = regions[region]
            cut = selection.all(*selections)
            weight = weights.weight()[cut]

            def normalize(val):
                try:
                    return val[cut].pad(1, clip=True).fillna(0).flatten()
                except:
                    try:
                        return val[cut].flatten()
                    except:
                        return val[cut]

            # output['%s_fjetprop'%region].fill(#fjet_pt = normalize(candidatejet.pt),
            #                                   fjet_msd = normalize(candidatejet.msdcorr),
            #                                   fjet_lsf3 = normalize(candidatejet.lsf3),
            #                                   #jet_oppbtag = normalize(ak4_opposite.btagDeepB.max()),
            #                                   genflavor = normalize(genflavor),
            #                                   dataset=dataset,
            #                                   weight=weight
            # )
            # output['%s_fjetextraprop'%region].fill(fjet_t41 = normalize(candidatejet.tau4/candidatejet.tau1),
            #                                        fjet_t42 = normalize(candidatejet.tau4/candidatejet.tau2),
            #                                        fjet_t31 = normalize(candidatejet.tau3/candidatejet.tau1),
            #                                        dataset=dataset,
            #                                        weight=weight
            #                                    )
            # output['%s_jetprop'%region].fill(jet_oppbtag = normalize(ak4_opposite.btagDeepB.max()),
            #                                  genflavor = normalize(genflavor),
            #                                  dataset=dataset,
            #                                  weight=weight
            #                                 )
            output['%s_fmmjetprop' % region].fill(
                fjet_pt=normalize(candidatejet.pt),
                #fjet_mmass = normalize(jmass),
                #fjet_hmass = normalize(hmass),
                lep_pt=normalize(candidatelep.pt),
                fjet_lsf3=normalize(candidatejet.lsf3),
                genflavor=normalize(genflavor),
                dataset=dataset,
                weight=weight)
            output['%s_fmmjetprop2' % region].fill(
                fjet_mmass=normalize(jmass),
                fjet_lsf3=normalize(candidatejet.lsf3),
                genflavor=normalize(genflavor),
                dataset=dataset,
                weight=weight)
            # output['%s_flsjetprop'%region].fill(#flsjet_pt = normalize(candidatejet.LSpt),
            #                                     flsjet_msd = normalize(candidatejet.LSmsoftdrop),
            #                                     #flsjet_n2b1 = normalize(candidatejet.LSn2b1),
            #                                     #flsjet_n3b1 = normalize(candidatejet.LSn3b1),
            #                                     #flsjet_t21 = normalize(candidatejet.LStau2/candidatejet.LStau1),
            #                                     #flsjet_t32 = normalize(candidatejet.LStau3/candidatejet.LStau2),
            #                                     genflavor = normalize(genflavor),
            #                                     dataset=dataset,
            #                                     weight=weight)
            #output['%s_metprop'%region].fill(met_pt = normalize(met.pt),
            #                                 met_phi = normalize(met.phi),
            #                                 dataset=dataset,
            #                                 weight=weight)
            # output['%s_weight'%region].fill(puweight=weights.partial_weight(include=["pileup_weight"])[cut],
            #                                 genweight=weights.partial_weight(include=["genweight"])[cut],
            #                                 dataset=dataset,
            #                                 )
            # if self._channel=='muon':
            #     output['%s_muonprop'%region].fill(muon_pt = normalize(candidatemuon.pt),
            #                                       muon_miso = normalize(candidatemuon.miniPFRelIso_all),
            #                                       muon_sip = normalize(candidatemuon.sip3d),
            #                                       dataset=dataset,
            #                                       weight=weight)
            #     output['%s_muonextraprop'%region].fill(nmuons = normalize(nmuons),
            #                                            nelectrons = normalize(nelectrons),
            #                                            muon_dz = normalize(candidatemuon.dz),
            #                                            muon_dxy = normalize(candidatemuon.dxy),
            #                                            dataset=dataset,
            #                                            weight=weight)

            # else:
            #     output['%s_electronprop'%region].fill(electron_pt = normalize(candidateelectron.pt),
            #                                           electron_miso = normalize(candidateelectron.miniPFRelIso_all),
            #                                           electron_sip = normalize(candidateelectron.sip3d),
            #                                           dataset=dataset,
            #                                           weight=weight)
            #     output['%s_electronextraprop'%region].fill(nmuons = normalize(nmuons),
            #                                                nelectrons = normalize(nelectrons),
            #                                                electron_dz = normalize(candidateelectron.dz),
            #                                                electron_dxy = normalize(candidateelectron.dxy),
            #                                                dataset=dataset,
            #                                                weight=weight)

        return output
コード例 #23
0
    def process(self, df):
        dataset = df['dataset']
        isRealData = 'genWeight' not in df
        isSignal = 'htautau' in dataset
        output = self.accumulator.identity()

        # select at least one jet and one muon ( this is Pre-Selection! )                                                                                                       
        events = buildevents(df, fatjet='CustomAK8Puppi')
        good = (
            (events.muons.counts >= 1)
            & (events.fatjets.counts >= 1)
            )
        events = events[good]

        selection = processor.PackedSelection()
        # trigger
        trigger = np.ones(df.size, dtype='bool')
        for t in self._triggers[self._year+'_'+self._trigger]:
            trigger &= df[t]
        selection.add('trigger', trigger[good])

        # muon selection
        goodmuon = (
            (events.muons.p4.pt > 10)
            & (np.abs(events.muons.p4.eta) < 2.4)
            & (events.muons.sip3d < 4)
            & (np.abs(events.muons.dz) < 0.1)
            & (np.abs(events.muons.dxy) < 0.05)
            & (events.muons.mvaId == 2)
        )
        nmuons = goodmuon.sum()
        leadingmuon = events.muons[goodmuon][:, 0:1]

        # fatjet closest to lepton 
        leadingmuon = events.muons[:, 0]
        mujet_dR = leadingmuon.p4.delta_r(events.fatjets.p4)
        mu_in_cone = mujet_dR.min() < 0.8 # this I am not sure we have to put as a selection...
        mujet_bestidx = mujet_dR.argmin()
        leadingjet_mu = events.fatjets[mujet_bestidx]

        selection.add('jetkin', (
                (leadingjet_mu.p4.pt > 300)
                & (leadingjet_mu.p4.eta < 2.4)
                & (leadingjet_mu.msoftdrop > 10.)
                ).any())
        selection.add('jetid', (leadingjet_mu.jetId & 2).any())  # tight id 

        # lepton inside jet?
        selection.add('muinside', mu_in_cone.astype(bool))
        selection.add('LSF3muinside', (leadingjet_mu.electronIdx3SJ == 0).any())
        selection.add('LSF3medium', (leadingjet_mu.lsf3>0.78).any())

        # veto b-tag in opposite side
        jets = events.jets[
            (events.jets.p4.pt > 30.)
            & (events.jets.jetId & 2)  # tight id
            ]
        ak4_ak8_pair = jets.cross(leadingjet_mu, nested=True)
        dphi = ak4_ak8_pair.i0.p4.delta_phi(ak4_ak8_pair.i1.p4)
        ak4_opposite = jets[(np.abs(dphi) > np.pi / 2).all()]
        selection.add('antiak4btagMediumOppHem', ak4_opposite.deepcsvb.max() < self._btagWPs['med'][self._year])

        # b-tag in same side
        #subjets = events.subjets[:, leadingjet_mu.subJetIdx1]

        # final lepton selection
        nelectrons = (
            (events.electrons.p4.pt > 10)
            & (np.abs(events.electrons.p4.eta) < 2.5)
            & (events.electrons.cutBased & (1 << 2)).astype(bool)  # 2017V2 loose                                                                                                    
        ).sum()
        selection.add('onemuon', (nmuons == 1) & (nelectrons == 0)) # should we veto taus?                                                                                                          
        selection.add('muonkin', (
            (leadingmuon.p4.pt > 27.)
            & (np.abs(leadingmuon.p4.eta) < 2.4)
            ))

        # building variables
        leadingjet_mu = leadingjet_mu.flatten()
        mm = (leadingjet_mu.p4 - leadingmuon.p4).mass2 
        jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*leadingjet_mu.p4.mass # (jet - lep).M  

        met = events.met
        joffshell = jmass < 62.5
        massassumption = 80.*joffshell + (125 - 80.)*~joffshell
        x = massassumption**2/(2*leadingmuon.p4.pt*met.rho) + np.cos(leadingmuon.p4.phi - met.phi)
        met_eta = (
            (x < 1)*np.arcsinh(x*np.sinh(leadingmuon.p4.eta))
            + (x >= 1)*(
                leadingmuon.p4.eta
                - np.sign(leadingmuon.p4.eta)*np.arccosh(np.maximum(1., x))
                )
            )
        met_p4 = TLorentzVectorArray.from_ptetaphim(met.rho, met_eta, met.phi, np.zeros(met.size))

        # filling missing columns
        df['jet_pt'] = leadingjet_mu.p4.pt
        df['jet_lsf3'] = leadingjet_mu.lsf3
        df['jet_mmass'] = jmass
        df['jet_hmass'] = (met_p4 + leadingjet_mu.p4).mass
        df['jet_oppbtag'] = ak4_opposite.deepcsvb.max()
        df['muon_pt'] = leadingmuon.p4.pt
        df['muon_miso'] = leadingmuon.miniPFRelIso_all
        df['met_pt'] = met.rho
        df['met_eta'] = met_eta

        # fill cutflow
        cutflow = ['trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin', 'muinside', 'LSF3muinside','LSF3muinside']
        allcuts = set()
        output['cutflow']['none'] += len(events)
        for cut in cutflow:
            allcuts.add(cut)
            output['cutflow'][cut] += selection.all(*allcuts).sum()

        weights = processor.Weights(len(events))
        if not isRealData:
            weights.add('genweight', events.genWeight)

        regions = {}
        regions['presel'] = {'trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin'}
        regions['muinjet'] = {'trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin', 'muinside', 'LSF3muinside','LSF3muinside'}

        for histname, h in output.items():
            if not isinstance(h, hist.Hist):
                continue
            if not all(k in df or k == 'systematic' for k in h.fields):
                print("Missing fields %r from %r" % (set(h.fields) - set(df.keys()), h))
                continue
            fields = {k: df[k] for k in h.fields if k in df}
            region = [r for r in regions.keys() if r in histname.split('_')]
            if len(region) == 1:
                region = region[0]
                cut = selection.all(*regions[region])
                h.fill(**fields, weight=cut)
            elif len(region) > 1:
                raise ValueError("Histogram '%s' has a name matching multiple region definitions: %r" % (histname, region))
            else:
                raise ValueError("Histogram '%s' does not fall into any region definitions." % (histname, ))

        return output
コード例 #24
0
    def process(self, events):
        dataset = events.metadata['dataset']
        print('process dataset', dataset)
        isRealData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        weights = processor.Weights(len(events))
        output = self.accumulator.identity()
        if (len(events) == 0): return output
        if not isRealData:
            output['sumw'][dataset] += events.genWeight.sum()

        # trigger paths
        if isRealData:
            trigger_fatjet = np.zeros(events.size, dtype='bool')
            for t in self._triggers[self._year]:
                try:
                    trigger_fatjet = trigger_fatjet | events.HLT[t]
                except:
                    print('trigger %s not available' % t)
                    continue

            trigger_muon = np.zeros(events.size, dtype='bool')
            for t in self._muontriggers[self._year]:
                trigger_muon = trigger_muon | events.HLT[t]

        else:
            trigger_fatjet = np.ones(events.size, dtype='bool')
            trigger_muon = np.ones(events.size, dtype='bool')

        selection.add('fatjet_trigger', trigger_fatjet)
        selection.add('muon_trigger', trigger_muon)

        #jet corrected kinematics
        gru = events.GRU
        IN = events.IN
        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['rhocorr'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['gruddt'] = gru.v25 - shift(
            fatjets, algo='gruddt', year=self._year)
        fatjets['gru'] = gru.v25
        fatjets['in_v3'] = IN.v3
        fatjets['in_v3_ddt'] = IN.v3 - shift(
            fatjets, algo='inddt', year=self._year)
        fatjets['in_v3_ddt_90pctl'] = IN.v3 - shift(
            fatjets, algo='inddt90pctl', year=self._year)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)

        fatjets["genMatchFull"] = genmatch(events, dataset)
        #else: fatjets["genMatchFull"] = fatjets.pt.zeros_like()  #np.zeros(events.size, dtype='bool')

        candidatejet = fatjets[:, :1]
        candidatemuon = events.Muon[:, :5]

        # run model on PFCands associated to FatJet (FatJetPFCands)
        #events.FatJet.array.content["PFCands"] = type(events.FatJetPFCands.array).fromcounts(events.FatJet.nPFConstituents.flatten(), events.FatJetPFCands.flatten())
        #events.FatJet.array.content["twoProngGru"] = run_model(events.FatJet.flatten())

        selection.add('pt', (candidatejet.pt > 525).any())
        selection.add('msdcorr', (candidatejet.msdcorr > 40).any())
        # basic jet selection
        goodjet_sel = ((candidatejet.pt > 525)
                       & (abs(candidatejet.eta) < 2.5)
                       & (candidatejet.msoftdrop > 40.)
                       & (candidatejet.rhocorr > -5.5)
                       & (candidatejet.rhocorr < -2)
                       &
                       (candidatejet.genMatchFull if
                        ('WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset) else
                        (1 == 1))).any()

        vselection_goodjet_sel = ((candidatejet.pt > 200)
                                  & (abs(candidatejet.eta) < 2.5)
                                  & (candidatejet.msoftdrop > 40.)).any()
        #& (candidatejet.genMatchFull if ('TTTo' in dataset) else (1==1))).any()
        #& (candidatejet.rhocorr > -5.5)
        #& (candidatejet.rhocorr < -2)).any()

        selection.add('vselection_jetkin', vselection_goodjet_sel)

        #goodmuon sel for muon CR (lep vetos below)
        goodmuon_sel = ((candidatemuon.pt > 55)
                        & (abs(candidatemuon.eta) < 2.1)
                        & (candidatemuon.looseId).astype(bool)
                        & (candidatemuon.pfRelIso04_all < 0.15)).any()
        vselection_goodmuon_sel = ((candidatemuon.pt > 53)
                                   & (abs(candidatemuon.eta) < 2.1)
                                   & (candidatemuon.tightId).astype(bool))

        #& (candidatemuon.pfRelIso04_all < 0.15))

        vselection_goodmuon_sel_loose = ((candidatemuon.pt > 20)
                                         & (candidatemuon.looseId).astype(bool)
                                         & (abs(candidatemuon.eta) < 2.4))

        selection.add('vselection_muonkin', vselection_goodmuon_sel.any())
        selection.add('vselection_onetightmuon',
                      vselection_goodmuon_sel.sum() == 1)
        selection.add('vselection_oneloosemuon',
                      vselection_goodmuon_sel_loose.sum() == 1)

        candidatemuon = candidatemuon[:, 0:1]

        selection.add('muonkin', goodmuon_sel)
        selection.add('jetkin', goodjet_sel)

        selection.add('n2ddt', (candidatejet.n2ddt < 0.).any())
        selection.add('jetid', candidatejet.isTight.any())
        selection.add('met', events.MET.pt > 40.)

        muon_ak8_pair = candidatemuon.cross(candidatejet, nested=True)

        selection.add('muonDphiAK8',
                      (abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) >
                       2 * np.pi / 3).all().all())
        selection.add('vselection_muonDphiAK8', (abs(
            muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 1).all().all())

        #ak4 puppi jet for CR
        jets = events.Jet[((events.Jet.pt > 50.)
                           & (abs(events.Jet.eta) < 2.5))][:, :10]

        # only consider first 4 jets to be consistent with old framework
        ak4_ak8_pair = jets.cross(candidatejet, nested=True)
        dr = abs(ak4_ak8_pair.i0.delta_r(ak4_ak8_pair.i1))
        dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1))

        ak4_away = jets[(dr > 0.8).all()]
        selection.add('ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838)
        ak4_opposite = jets[(dphi > np.pi / 2).all()]
        selection.add('antiak4btagMediumOppHem',
                      ak4_opposite.btagCSVV2.max() < 0.8838)

        mu_p4 = TLorentzVectorArray.from_ptetaphim(
            candidatemuon.pt.fillna(0), candidatemuon.eta.fillna(0),
            candidatemuon.phi.fillna(0), candidatemuon.mass.fillna(0))
        met_p4 = TLorentzVectorArray.from_ptetaphim(
            awkward.JaggedArray.fromiter([[v] for v in events.MET.pt]),
            awkward.JaggedArray.fromiter([[v] for v in np.zeros(events.size)]),
            awkward.JaggedArray.fromiter([[v] for v in events.MET.phi]),
            awkward.JaggedArray.fromiter([[v] for v in np.zeros(events.size)]))

        met_candidatemuon_pair = met_p4.cross(mu_p4)

        Wleptoniccandidate = met_candidatemuon_pair.i0 + met_candidatemuon_pair.i1

        selection.add('Wleptonic_candidate',
                      (Wleptoniccandidate.pt > 200).any())

        vselection_jets = events.Jet[((events.Jet.pt > 30.)
                                      & (abs(events.Jet.eta) < 2.4))]

        vselection_ak4_ak8_pair = vselection_jets.cross(candidatejet,
                                                        nested=True)
        muon_ak4_pair = vselection_jets.cross(candidatemuon, nested=True)
        dr_ak8 = abs(
            vselection_ak4_ak8_pair.i0.delta_r(vselection_ak4_ak8_pair.i1))
        dr_muon = abs(muon_ak4_pair.i0.delta_r(muon_ak4_pair.i1))
        ak4_away = vselection_jets[(dr_ak8 > 0.8).all()]
        selection.add('vselection_ak4btagMedium08',
                      ak4_away.btagCSVV2.max() > 0.8838)

        ak4_away = vselection_jets[(dr_muon > 0.3).all()]

        selection.add('vselection_muonDphiAK4',
                      ak4_away.btagCSVV2.max() > 0.8838)

        nelectrons = ((
            (events.Electron.pt > 10.)
            & (abs(events.Electron.eta) < 2.5)
            #& (events.Electron.cutBased >= events.Electron.LOOSE))
            #& (events.Electron.cutBased_Fall17_V1 >= 1))
            & (events.Electron.cutBased >= 2))).sum()
        nmuons = (((events.Muon.pt > 10)
                   & (abs(events.Muon.eta) < 2.1)
                   #& (events.Muon.pfRelIso04_all < 0.4)
                   & (events.Muon.looseId).astype(bool))).sum()

        ntaus = (((events.Tau.pt > 20.)
                  #& (events.Tau.idMVAnewDM2017v2 >=4))
                  & (events.Tau.idDecayMode).astype(bool)
                  & (events.Tau.rawIso < 5)
                  & (abs(events.Tau.eta) < 2.3))).sum()
        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('noelectron_notau', (nelectrons == 0) & (ntaus == 0))
        #weights.add('metfilter', events.Flag.METFilters)
        if isRealData:
            genflavor = candidatejet.pt.zeros_like().pad(
                1, clip=True).fillna(-1).flatten()
        if not isRealData:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            #add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) #signal region only
            #add_singleMuTriggerWeight(weights, abs(candidatemuon.eta), candidatemuon.pt, self._year)
            bosons = getBosons(events)
            genBosonPt = bosons.pt.pad(1, clip=True).fillna(0)
            add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            genflavor = matchedBosonFlavor(candidatejet, bosons).pad(
                1, clip=True).fillna(-1).flatten()

            #b-tag weights
        regions = {
            'signal': [
                'fatjet_trigger',
                'jetkin',
                'noleptons',
                'jetid',
                'antiak4btagMediumOppHem',
            ],
            'ttbar_muoncontrol': [
                'muon_trigger',
                'pt',
                'msdcorr',
                'jetid',
                'jetkin',
                'muonkin',
                'muonDphiAK8',
                'ak4btagMedium08',
                'noelectron_notau',
            ],
            'vselection': [
                'muon_trigger', 'vselection_jetkin', 'vselection_muonkin',
                'vselection_onetightmuon', 'vselection_oneloosemuon',
                'vselection_muonDphiAK8', 'vselection_ak4btagMedium08',
                'vselection_muonDphiAK4', 'Wleptonic_candidate', 'met'
            ],
            'noselection':
            [],  #'vselection_muoncontrol' : ['muon_trigger', 'v_selection_jetkin', 'genmatch', 'jetid', 'ak4btagMedium08', 'muonkin','met'],
        }
        allcuts_signal = set()
        output['cutflow_signal'][dataset]['none'] += float(
            weights.weight().sum())
        allcuts_ttbar_muoncontrol = set()
        output['cutflow_ttbar_muoncontrol'][dataset]['none'] += float(
            weights.weight().sum())
        allcuts_vselection = set()
        output['cutflow_vselection'][dataset]['none'] += float(
            weights.weight().sum())

        for cut in regions['signal']:
            allcuts_signal.add(cut)
            output['cutflow_signal'][dataset][cut] += float(
                weights.weight()[selection.all(*allcuts_signal)].sum())

        for cut in regions['ttbar_muoncontrol']:
            allcuts_ttbar_muoncontrol.add(cut)
            output['cutflow_ttbar_muoncontrol'][dataset][cut] += float(
                weights.weight()[selection.all(
                    *allcuts_ttbar_muoncontrol)].sum())

        for cut in regions['vselection']:
            allcuts_vselection.add(cut)
            output['cutflow_vselection'][dataset][cut] += float(
                weights.weight()[selection.all(*allcuts_vselection)].sum())

        def normalize(val, cut):
            return val[cut].pad(1, clip=True).fillna(0).flatten()

        def fill(region, systematic=None, wmod=None):
            print('filling %s' % region)
            selections = regions[region]
            cut = selection.all(*selections)
            weight = weights.weight()[cut]
            output['templates'].fill(
                dataset=dataset,
                region=region,
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                n2ddt=normalize(candidatejet.n2ddt, cut),
                gruddt=normalize(candidatejet.gruddt, cut),
                in_v3_ddt=normalize(candidatejet.in_v3_ddt_90pctl, cut),
                weight=weight,
            ),
            output['event'].fill(
                dataset=dataset,
                region=region,
                MET=events.MET.pt[cut],
                nJet=fatjets.counts[cut],
                nPFConstituents=normalize(candidatejet.nPFConstituents, cut),
                weight=weight,
            ),
            output['muon'].fill(
                dataset=dataset,
                region=region,
                mu_pt=normalize(candidatemuon.pt, cut),
                mu_pfRelIso04_all=normalize(candidatemuon.pfRelIso04_all, cut),
                weight=weight,
            ),
            output['deepAK8'].fill(
                dataset=dataset,
                region=region,
                deepTagMDWqq=normalize(candidatejet.deepTagMDWqq, cut),
                deepTagMDZqq=normalize(candidatejet.deepTagMDZqq, cut),
                msd=normalize(candidatejet.msdcorr, cut),
                genflavor=genflavor[cut],
                weight=weight,
            ),
            output['in_v3'].fill(
                dataset=dataset,
                region=region,
                genflavor=genflavor[cut],
                in_v3=normalize(candidatejet.in_v3, cut),
                n2=normalize(candidatejet.n2b1, cut),
                gru=normalize(candidatejet.gru, cut),
                weight=weight,
            )

        for region in regions:
            fill(region)

        return output
コード例 #25
0
ファイル: darkhiggs.py プロジェクト: drberry85/decaf
    def process(self, df):

            dataset = df['dataset']

            selected_regions = {}
            if not dataset in selected_regions: selected_regions[dataset] = []
            for selection,v in self._samples.items():
                for i in range (0,len(v)):
                    if v[i] not in dataset: continue
                    selected_regions[dataset].append(selection)

            ###
            #Getting corrections, ids, triggers, ecc, from .coffea files
            ###

            met_trigger_paths       = self._triggers['met_trigger_paths']      
            singleele_trigger_paths = self._triggers['singleele_trigger_paths']
            singlepho_trigger_paths = self._triggers['singlepho_trigger_paths']

            get_msd_weight          = self._corrections['get_msd_weight']    
            get_ttbar_weight        = self._corrections['get_ttbar_weight']       
            get_nlo_weight          = self._corrections['get_nlo_weight']         
            get_adhoc_weight        = self._corrections['get_adhoc_weight']       
            get_pu_weight           = self._corrections['get_pu_weight']          
            get_met_trig_weight     = self._corrections['get_met_trig_weight']    
            get_met_zmm_trig_weight = self._corrections['get_met_zmm_trig_weight']
            get_ele_trig_weight     = self._corrections['get_ele_trig_weight']    
            get_pho_trig_weight     = self._corrections['get_pho_trig_weight']    
            get_ecal_bad_calib      = self._corrections['get_ecal_bad_calib']     

            isLooseElectron = self._ids['isLooseElectron'] 
            isTightElectron = self._ids['isTightElectron'] 
            isLooseMuon     = self._ids['isLooseMuon']     
            isTightMuon     = self._ids['isTightMuon']     
            isLooseTau      = self._ids['isLooseTau']      
            isLoosePhoton   = self._ids['isLoosePhoton']   
            isTightPhoton   = self._ids['isTightPhoton']   
            isGoodJet       = self._ids['isGoodJet']       
            isGoodFatJet    = self._ids['isGoodFatJet']    
            isHEMJet        = self._ids['isHEMJet']        

            met_filter_flags = self._metfilters['met_filter_flags']

            ###
            #Initialize global quantities (MET ecc.)
            ###

            met = Initialize({'pt':df['MET_pt'],
                              'eta':0,
                              'phi':df['MET_phi'],
                              'mass':0})

            calomet = Initialize({'pt':df['CaloMET_pt'],
                                  'eta':0,
                                  'phi':df['CaloMET_phi'],
                                  'mass':0})

            ###
            #Initialize physics objects
            ###

            #Define first and empty object that will use as protection against arrays with size 0
            #Will use MET to set the correct size for the arrays
            #Not used at the moment

            #empty_jagged = awkward.JaggedArray.fromcounts(np.ones_like(met.pt, dtype=int),np.zeros_like(met.pt))
            #empty_obj = Initialize({'pt':empty_jagged,
            #                        'eta':empty_jagged,
            #                        'phi':empty_jagged,
            #                        'mass':empty_jagged})

            e = Initialize({'pt':df['Electron_pt'],
                            'eta':df['Electron_eta'],
                            'phi':df['Electron_phi'],
                            'mass':df['Electron_mass']})

            for key in self._e_id[self._year]:
                if self._e_id[self._year][key] in df:
                    e[key] = df[self._e_id[self._year][key]]
                else:
                    e[key] = e.pt.zeros_like()

            e['isloose'] = isLooseElectron(e.pt,e.eta,e.dxy,e.dz,e.iso,e.loose_id,self._year)
            e['istight'] = isTightElectron(e.pt,e.eta,e.dxy,e.dz,e.iso,e.tight_id,self._year)

            leading_e = e[e.pt.argmax()]
            leading_e = leading_e[leading_e.istight.astype(np.bool)]

            e_loose = e[e.isloose.astype(np.bool)]
            e_tight = e[e.istight.astype(np.bool)]

            e_ntot = e.counts
            e_nloose = e_loose.counts
            e_ntight = e_tight.counts

            mu = Initialize({'pt':df['Muon_pt'],
                             'eta':df['Muon_eta'],
                             'phi':df['Muon_phi'],
                             'mass':df['Muon_mass']})

            for key in self._mu_id[self._year]:
                if self._mu_id[self._year][key] in df:
                    mu[key] = df[self._mu_id[self._year][key]]
                else:
                    mu[key] = mu.pt.zeros_like()

            mu['isloose'] = isLooseMuon(mu.pt,mu.eta,mu.dxy,mu.dz,mu.iso,mu.med_id,self._year)
            mu['istight'] = isTightMuon(mu.pt,mu.eta,mu.dxy,mu.dz,mu.iso,mu.tight_id,self._year)

            leading_mu = mu[mu.pt.argmax()]
            leading_mu = leading_mu[leading_mu.istight.astype(np.bool)]

            mu_loose=mu[mu.isloose.astype(np.bool)]
            mu_tight=mu[mu.istight.astype(np.bool)]

            mu_ntot = mu.counts
            mu_nloose = mu_loose.counts
            mu_ntight = mu_tight.counts

            tau = Initialize({'pt':df['Tau_pt'],
                              'eta':df['Tau_eta'],
                              'phi':df['Tau_phi'],
                              'mass':df['Tau_mass']})

            for key in self._tau_id[self._year]:
                if self._tau_id[self._year][key] in df:
                    tau[key] = df[self._tau_id[self._year][key]]
                else:
                    tau[key] = tau.pt.zeros_like()


            tau['isclean'] =~tau.match(mu_loose,0.3)&~tau.match(e_loose,0.3)
            tau['isloose']=isLooseTau(tau.pt,tau.eta,tau.decayMode,tau.id,self._year)&tau.isclean.astype(np.bool)
            tau_loose=tau[tau.isloose.astype(np.bool)]

            tau_ntot=tau.counts
            tau_nloose=tau_loose.counts

            pho = Initialize({'pt':df['Photon_pt'],
                              'eta':df['Photon_eta'],
                              'phi':df['Photon_phi'],
                              'mass':df['Photon_mass']})

            for key in self._pho_id[self._year]:
                if self._pho_id[self._year][key] in df:
                    pho[key] = df[self._pho_id[self._year][key]]
                else:
                    pho[key] = pho.pt.zeros_like()

            pho['isclean'] =~pho.match(e_loose,0.4)
            pho['isloose']=isLoosePhoton(pho.pt,pho.eta,pho.loose_id,pho.eleveto,self._year)&pho.isclean.astype(np.bool)
            pho['istight']=isTightPhoton(pho.pt,pho.eta,pho.tight_id,pho.eleveto,self._year)&pho.isclean.astype(np.bool)

            leading_pho = pho[pho.pt.argmax()]
            leading_pho = leading_pho[leading_pho.istight.astype(np.bool)]

            pho_loose=pho[pho.isloose.astype(np.bool)]
            pho_tight=pho[pho.istight.astype(np.bool)]

            pho_ntot=pho.counts
            pho_nloose=pho_loose.counts
            pho_ntight=pho_tight.counts

            fj = Initialize({'pt':df['AK15Puppi_pt'],
                             'eta':df['AK15Puppi_eta'],
                             'phi':df['AK15Puppi_phi'],
                             'mass':df['AK15Puppi_mass']})

            fj['msd'] = df['AK15Puppi_msoftdrop']

            for key in self._fj_id[self._year]:
                if self._fj_id[self._year][key] in df:
                    fj[key] = df[self._fj_id[self._year][key]]
                else:
                    fj[key] = fj.pt.zeros_like()

            fj['isgood'] = isGoodFatJet(fj.pt, fj.eta, fj.id)
            fj['isclean'] =~fj.match(pho_loose,1.5)&~fj.match(mu_loose,1.5)&~fj.match(e_loose,1.5)&fj.isgood.astype(np.bool)

            for key in self._deep[self._year]:
                if self._deep[self._year][key] in df:
                    fj[key] = df[self._deep[self._year][key]]
                else:
                    fj[key] = fj.pt.zeros_like()

            fj['probQCD'] = fj.probQCDbb+fj.probQCDcc+fj.probQCDb+fj.probQCDc+fj.probQCDothers
            fj['TvsQCD'] = (fj.probTbcq + fj.probTbqq) / (fj.probTbcq + fj.probTbqq + fj.probQCD)
            fj['ZHbbvsQCD'] = (fj.probZbb + fj.probHbb) / (fj.probZbb+ fj.probHbb+ fj.probQCD)
            fj['VvsQCD'] = (fj.probWcq+fj.probWqq+fj.probZcc+fj.probZqq+fj.probZbb) / (fj.probWcq+fj.probWqq+fj.probZcc+fj.probZqq+fj.probZbb+fj.probQCD)

            leading_fj = fj[fj.pt.argmax()]
            leading_fj = leading_fj[leading_fj.isclean.astype(np.bool)]
            leading_fj_msd_corr = leading_fj.msd.sum()*get_msd_weight(leading_fj.pt.sum(),leading_fj.eta.sum())

            fj_good = fj[fj.isgood.astype(np.bool)]
            fj_clean=fj[fj.isclean.astype(np.bool)]

            fj_ntot=fj.counts
            fj_ngood=fj_good.counts
            fj_nclean=fj_clean.counts

            j = Initialize({'pt':df['Jet_pt'],
                            'eta':df['Jet_eta'],
                            'phi':df['Jet_phi'],
                            'mass':df['Jet_mass']})

            #https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X
            j['deepcsv'] = df['Jet_btagDeepB']
            j['deepflv'] = df['Jet_btagDeepFlavB']

            for key in self._j_id[self._year]:
                if self._j_id[self._year][key] in df:
                    j[key] = df[self._j_id[self._year][key]]
                else:
                    j[key] = j.pt.zeros_like()

            j['isgood'] = isGoodJet(j.pt, j.eta, j.id, j.nhf, j.nef, j.chf, j.cef)
            j['isHEM'] = isHEMJet(j.pt, j.eta, j.phi)
            j['isclean'] = ~j.match(e_loose,0.4)&~j.match(mu_loose,0.4)&~j.match(pho_loose,0.4)&j.isgood.astype(np.bool)
            #j['isclean'] = ~j.match(e_tight,0.4)&~j.match(mu_tight,0.4)&~j.match(pho_tight,0.4)&j.isgood
            j['isiso'] =  ~(j.match(fj_clean,1.5))&j.isclean.astype(np.bool)
            j['isdcsvL'] = (j.deepcsv>0.1241)&j.isiso.astype(np.bool)
            j['isdflvL'] = (j.deepflv>0.0494)&j.isiso.astype(np.bool)
            j['isdcsvM'] = (j.deepcsv>0.4184)&j.isiso.astype(np.bool)
            j['isdflvM'] = (j.deepflv>0.2770)&j.isiso.astype(np.bool)
            j['isdcsvT'] = (j.deepcsv>0.7527)&j.isiso.astype(np.bool)
            j['isdflvT'] = (j.deepflv>0.7264)&j.isiso.astype(np.bool)

            leading_j = j[j.pt.argmax()]
            leading_j = leading_j[leading_j.isclean.astype(np.bool)]

            j_good = j[j.isgood.astype(np.bool)]
            j_clean = j[j.isclean.astype(np.bool)]
            j_iso = j[j.isiso.astype(np.bool)]
            j_dcsvL = j[j.isdcsvL]
            j_dflvL = j[j.isdflvL]
            j_dcsvM = j[j.isdcsvM]
            j_dflvM = j[j.isdflvM]
            j_dcsvT = j[j.isdcsvT]
            j_dflvT = j[j.isdflvT]
            j_HEM = j[j.isHEM.astype(np.bool)]

            j_ntot=j.counts
            j_ngood=j_good.counts
            j_nclean=j_clean.counts
            j_niso=j_iso.counts
            j_ndcsvL=j_dcsvL.counts
            j_ndflvL=j_dflvL.counts
            j_ndcsvM=j_dcsvM.counts
            j_ndflvM=j_dflvM.counts
            j_ndcsvT=j_dcsvT.counts
            j_ndflvT=j_dflvT.counts
            j_nHEM = j_HEM.counts

            ###
            #Calculating derivatives
            ###
            ele_pairs = e_loose.distincts()
            diele = leading_e
            leading_diele = leading_e
            if ele_pairs.i0.content.size>0:
                diele = ele_pairs.i0+ele_pairs.i1
                leading_diele = diele[diele.pt.argmax()]

            mu_pairs = mu_loose.distincts()
            dimu = leading_mu
            leading_dimu = leading_mu
            if mu_pairs.i0.content.size>0:
                dimu = mu_pairs.i0+mu_pairs.i1
                leading_dimu = dimu[dimu.pt.argmax()]

            u={}
            u["iszeroL"] = met
            u["isoneM"] = met+leading_mu.sum()
            u["isoneE"] = met+leading_e.sum()
            u["istwoM"] = met+leading_dimu.sum()
            u["istwoE"] = met+leading_diele.sum()
            u["isoneA"] = met+leading_pho.sum()

            lepSys={}
            lepSys["iszeroL"] = met
            lepSys["isoneM"] = leading_mu.sum()
            lepSys["isoneE"] = leading_e.sum()
            lepSys["istwoM"] = leading_dimu.sum()
            lepSys["istwoE"] = leading_diele.sum()
            lepSys["isoneA"] = leading_pho.sum()

            leadlepton={}
            leadlepton["iszeroL"] = met
            leadlepton["isoneM"] = leading_mu.sum()
            leadlepton["isoneE"] = leading_e.sum()
            leadlepton["istwoM"] = leading_mu.sum()
            leadlepton["istwoE"] = leading_e.sum()
            leadlepton["isoneA"] = leading_pho.sum()

            ###
            #Calculating weights
            ###

            ###
            # For MC, retrieve the LHE weights, to take into account NLO destructive interference, and their sum
            ###

            genw = np.ones_like(df['MET_pt'])
            sumw = 1.
            wnlo = np.ones_like(df['MET_pt'])
            adhocw = np.ones_like(df['MET_pt'])
            if self._xsec[dataset] != -1:
                genw = df['genWeight']
                sumw = genw.sum()

                if 'TTJets' in dataset or 'WJets' in dataset or 'DY' in dataset or 'ZJets' in dataset:
                    gen_flags = df['GenPart_statusFlags']
                    LastCopy = (gen_flags&(1 << 13))==0
                    #genLastCopy = Initialize({'pt':df['GenPart_pt'][LastCopy],
                    #                          'eta':df['GenPart_eta'][LastCopy],
                    #                          'phi':df['GenPart_phi'][LastCopy],
                    #                          'mass':df['GenPart_mass'][LastCopy],
                    #                          'pdgid':df['GenPart_pdgId'][LastCopy]})
                    gen_pt = df['GenPart_pt'][LastCopy]
                    gen_pdgid = df['GenPart_pdgId'][LastCopy]

                    #genTops = genLastCopy[abs(genLastCopy.pdgid)==6]
                    #genWs = genLastCopy[abs(genLastCopy.pdgid)==24]
                    #genZs = genLastCopy[abs(genLastCopy.pdgid)==23]
                    #genAs = genLastCopy[abs(genLastCopy.pdgid)==22]
                    #genHs = genLastCopy[abs(genLastCopy.pdgid)==25]
                    genTops = gen_pt[abs(gen_pdgid)==6]
                    genWs = gen_pt[abs(gen_pdgid)==24]
                    genZs = gen_pt[abs(gen_pdgid)==23]
                    genAs = gen_pt[abs(gen_pdgid)==22]
                    genHs = gen_pt[abs(gen_pdgid)==25]

                    isTT = (genTops.counts==2)
                    isW  = (genTops.counts==0)&(genWs.counts==1)&(genZs.counts==0)&(genAs.counts==0)&(genHs.counts==0)
                    isZ  = (genTops.counts==0)&(genWs.counts==0)&(genZs.counts==1)&(genAs.counts==0)&(genHs.counts==0)
                    isA  = (genTops.counts==0)&(genWs.counts==0)&(genZs.counts==0)&(genAs.counts==1)&(genHs.counts==0)
                    if('TTJets' in dataset): 
                        wnlo = np.sqrt(get_ttbar_weight(genTops[0].sum()) * get_ttbar_weight(genTops[1].sum()))
                    elif('WJets' in dataset): 
                        wnlo = get_nlo_weight[self._year]['w'](genWs[0].sum())
                        if self._year != '2016': adhocw = get_adhoc_weight['w'](genWs[0].sum())
                    elif('DY' in dataset or 'ZJets' in dataset): 
                        wnlo = get_nlo_weight[self._year]['z'](genZs[0].sum())
                        if self._year != '2016': adhocw = get_adhoc_weight['z'](genZs[0].sum())
                    elif('GJets' in dataset): wnlo = get_nlo_weight[self._year]['a'](genAs[0].sum())

            ###
            # Calculate PU weight and systematic variations
            ###

            nvtx = df['PV_npvs']
            pu = get_pu_weight[self._year]['cen'](nvtx)
            puUp = get_pu_weight[self._year]['up'](nvtx)
            puDown = get_pu_weight[self._year]['down'](nvtx)

            ###
            #Importing the MET filters per year from metfilters.py and constructing the filter boolean
            ###

            met_filters = {}
            for flag in met_filter_flags[self._year]:
                if flag in df:
                    met_filters[flag] = df[flag]

            ###
            #Importing the trigger paths per year from trigger.py and constructing the trigger boolean
            ###

            pass_trig = {}
            met_trigger = {}
            for path in met_trigger_paths[self._year]:
                if path in df:
                    met_trigger[path] = df[path]
            passMetTrig = np.zeros_like(df['MET_pt'], dtype=np.bool)
            for path in met_trigger:
                passMetTrig |= met_trigger[path]

            singleele_trigger = {}
            for path in singleele_trigger_paths[self._year]:
                if path in df:
                    singleele_trigger[path] = df[path]
            passSingleEleTrig = np.zeros_like(df['MET_pt'], dtype=np.bool)
            for path in singleele_trigger:
                passSingleEleTrig |= singleele_trigger[path]

            singlepho_trigger = {}
            for path in singlepho_trigger_paths[self._year]:
                if path in df:
                    singlepho_trigger[path] = df[path]
            passSinglePhoTrig = np.zeros_like(df['MET_pt'], dtype=np.bool)
            for path in singlepho_trigger:
                passSinglePhoTrig |= singlepho_trigger[path]

            pass_trig['iszeroL'] = passMetTrig
            pass_trig['isoneM'] = passMetTrig
            pass_trig['istwoM'] = passMetTrig
            pass_trig['isoneE'] = passSingleEleTrig
            pass_trig['istwoE'] = passSingleEleTrig
            pass_trig['isoneA'] =passSinglePhoTrig

            ###
            # Trigger efficiency weight
            ###

            trig = {}
            trig['iszeroL'] = get_met_trig_weight[self._year](u["iszeroL"].pt)
            trig['isoneM'] = get_met_trig_weight[self._year](u["isoneM"].pt)
            trig['istwoM'] = get_met_zmm_trig_weight[self._year](u["istwoM"].pt)
            trig['isoneE'] = get_ele_trig_weight[self._year](leading_e.eta.sum(), leading_e.pt.sum())
            trig['istwoE'] = trig['isoneE']
            if ele_pairs.i0.content.size>0:
                eff1 = get_ele_trig_weight[self._year](ele_pairs[diele.pt.argmax()].i0.eta.sum(),ele_pairs[diele.pt.argmax()].i0.pt.sum())
                eff2 = get_ele_trig_weight[self._year](ele_pairs[diele.pt.argmax()].i1.eta.sum(),ele_pairs[diele.pt.argmax()].i1.pt.sum())
                trig['istwoE'] = 1 - (1-eff1)*(1-eff2)
            trig['isoneA'] = get_pho_trig_weight[self._year](leading_pho.pt.sum())

            ###
            #Event selection
            ###

            selections = processor.PackedSelection()

            selections.add('iszeroL', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0))
            selections.add('isoneM', (e_nloose==0)&(mu_ntight==1)&(tau_nloose==0)&(pho_nloose==0))
            selections.add('isoneE', (e_ntight==1)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)&(met.pt>50))
            selections.add('istwoM', (e_nloose==0) & (mu_ntight>=1) & (mu_nloose==2) & (tau_nloose==0)&(pho_nloose==0)&(leading_dimu.mass.sum()>60) & (leading_dimu.mass.sum()<120))
            selections.add('istwoE', (e_ntight>=1) & (e_nloose==2)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)&(leading_diele.mass.sum()>60)&(leading_diele.mass.sum()<120))
            selections.add('isoneA', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_ntight==1))
            selections.add('noextrab', (j_ndflvL==0))
            selections.add('extrab', (j_ndflvL>0))
            selections.add('ismonohs', (leading_fj.ZHbbvsQCD.sum()>0.65))
            selections.add('ismonojet', ~(leading_fj.ZHbbvsQCD.sum()>0.65))
            selections.add('mass0', (leading_fj_msd_corr<30))
            selections.add('mass1', (leading_fj_msd_corr>=30)&(leading_fj_msd_corr<60))
            selections.add('mass2', (leading_fj_msd_corr>=60)&(leading_fj_msd_corr<80))
            selections.add('mass3', (leading_fj_msd_corr>=80)&(leading_fj_msd_corr<120))
            selections.add('mass4', (leading_fj_msd_corr>=120))
            selections.add('noHEMj', (j_nHEM==0))

            ###
            #Adding weights and selections
            ###

            weights = {}
            regions = {}
            for k in selected_regions[dataset]:
                weights[k] = processor.Weights(df.size)
                weights[k].add('nlo',wnlo)
                weights[k].add('adhoc',adhocw)
                weights[k].add('genw',genw)
                weights[k].add('pileup',pu,puUp,puDown)
                weights[k].add('passMetFilters',np.prod([met_filters[key] for key in met_filters], axis=0))
                weights[k].add('trig', trig[k])
                weights[k].add('pass_trig', pass_trig[k])


                selections.add(k+'baggy', (fj_nclean>0)&(fj_clean.pt.max()>160)&(abs(u[k].delta_phi(j_clean)).min()>0.8)&(u[k].pt>250))

                regions[k+'_baggy'] = {k,k+'baggy','noHEMj','noextrab'}
                regions[k+'_mass0'] = {k,k+'baggy','mass0','noHEMj','noextrab'}
                regions[k+'_mass1'] = {k,k+'baggy','mass1','noHEMj','noextrab'}
                regions[k+'_mass2'] = {k,k+'baggy','mass2','noHEMj','noextrab'}
                regions[k+'_mass3'] = {k,k+'baggy','mass3','noHEMj','noextrab'}
                regions[k+'_mass4'] = {k,k+'baggy','mass4','noHEMj','noextrab'}
                regions[k+'_baggy_extrab'] = {k,k+'baggy','noHEMj','extrab'}
                regions[k+'_mass0_extrab'] = {k,k+'baggy','mass0','noHEMj','extrab'}
                regions[k+'_mass1_extrab'] = {k,k+'baggy','mass1','noHEMj','extrab'}
                regions[k+'_mass2_extrab'] = {k,k+'baggy','mass2','noHEMj','extrab'}
                regions[k+'_mass3_extrab'] = {k,k+'baggy','mass3','noHEMj','extrab'}
                regions[k+'_mass4_extrab'] = {k,k+'baggy','mass4','noHEMj','extrab'}

                regions[k+'_baggy_ismonohs'] = {k,k+'baggy','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass0_ismonohs'] = {k,k+'baggy','mass0','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass1_ismonohs'] = {k,k+'baggy','mass1','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass2_ismonohs'] = {k,k+'baggy','mass2','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass3_ismonohs'] = {k,k+'baggy','mass3','noHEMj','noextrab','ismonohs'}
                regions[k+'_mass4_ismonohs'] = {k,k+'baggy','mass4','noHEMj','noextrab','ismonohs'}
                regions[k+'_baggy_extrab_ismonohs'] = {k,k+'baggy','noHEMj','extrab','ismonohs'}
                regions[k+'_mass0_extrab_ismonohs'] = {k,k+'baggy','mass0','noHEMj','extrab','ismonohs'}
                regions[k+'_mass1_extrab_ismonohs'] = {k,k+'baggy','mass1','noHEMj','extrab','ismonohs'}
                regions[k+'_mass2_extrab_ismonohs'] = {k,k+'baggy','mass2','noHEMj','extrab','ismonohs'}
                regions[k+'_mass3_extrab_ismonohs'] = {k,k+'baggy','mass3','noHEMj','extrab','ismonohs'}
                regions[k+'_mass4_extrab_ismonohs'] = {k,k+'baggy','mass4','noHEMj','extrab','ismonohs'}

                regions[k+'_baggy_ismonojet'] = {k,k+'baggy','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass0_ismonojet'] = {k,k+'baggy','mass0','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass1_ismonojet'] = {k,k+'baggy','mass1','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass2_ismonojet'] = {k,k+'baggy','mass2','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass3_ismonojet'] = {k,k+'baggy','mass3','noHEMj','noextrab','ismonojet'}
                regions[k+'_mass4_ismonojet'] = {k,k+'baggy','mass4','noHEMj','noextrab','ismonojet'}
                regions[k+'_baggy_extrab_ismonojet'] = {k,k+'baggy','noHEMj','extrab','ismonojet'}
                regions[k+'_mass0_extrab_ismonojet'] = {k,k+'baggy','mass0','noHEMj','extrab','ismonojet'}
                regions[k+'_mass1_extrab_ismonojet'] = {k,k+'baggy','mass1','noHEMj','extrab','ismonojet'}
                regions[k+'_mass2_extrab_ismonojet'] = {k,k+'baggy','mass2','noHEMj','extrab','ismonojet'}
                regions[k+'_mass3_extrab_ismonojet'] = {k,k+'baggy','mass3','noHEMj','extrab','ismonojet'}
                regions[k+'_mass4_extrab_ismonojet'] = {k,k+'baggy','mass4','noHEMj','extrab','ismonojet'}

            variables = {}
            variables['j1pt'] = leading_j.pt
            variables['j1eta'] = leading_j.eta
            variables['j1phi'] = leading_j.phi
            variables['fj1pt'] = leading_fj.pt
            variables['fj1eta'] = leading_fj.eta
            variables['fj1phi'] = leading_fj.phi
            variables['e1pt'] = leading_e.pt
            variables['e1phi'] = leading_e.phi
            variables['e1eta'] = leading_e.eta
            variables['dielemass'] = leading_diele.mass
            variables['mu1pt'] = leading_mu.pt
            variables['mu1phi'] = leading_mu.phi
            variables['mu1eta'] = leading_mu.eta
            variables['dimumass'] = leading_dimu.mass
            variables['njets'] = j_nclean
            variables['ndcsvL'] = j_ndcsvL
            variables['ndflvL'] = j_ndflvL
            variables['ndcsvM'] = j_ndcsvM
            variables['ndflvM'] = j_ndflvM
            variables['ndcsvT'] = j_ndcsvT
            variables['ndflvT'] = j_ndflvT
            variables['nfjtot'] = fj_ntot
            variables['nfjgood'] = fj_ngood
            variables['nfjclean'] = fj_nclean
            variables['TvsQCD'] = leading_fj.TvsQCD
            variables['ZHbbvsQCD'] = leading_fj.ZHbbvsQCD
            variables['VvsQCD'] = leading_fj.VvsQCD
            variables['probTbcq']      = leading_fj.probTbcq
            variables['probTbqq']      = leading_fj.probTbqq
            variables['probTbc']       = leading_fj.probTbc
            variables['probTbq']       = leading_fj.probTbq
            variables['probWcq']       = leading_fj.probWcq
            variables['probWqq']       = leading_fj.probWqq
            variables['probZbb']       = leading_fj.probZbb
            variables['probZcc']       = leading_fj.probZcc
            variables['probZqq']       = leading_fj.probZqq
            variables['probHbb']       = leading_fj.probHbb
            variables['probHcc']       = leading_fj.probHcc
            variables['probHqqqq']     = leading_fj.probHqqqq
            variables['probQCDbb']     = leading_fj.probQCDbb
            variables['probQCDcc']     = leading_fj.probQCDcc
            variables['probQCDb']      = leading_fj.probQCDb
            variables['probQCDc']      = leading_fj.probQCDc
            variables['probQCDothers'] = leading_fj.probQCDothers

            hout = self.accumulator.identity()
            hout['sumw'].fill(dataset=dataset, sumw=1, weight=sumw)
            i = 0
            while i < len(selected_regions[dataset]):
                r = selected_regions[dataset][i]
                weight = weights[r].weight()
                for s in ['baggy','mass0','mass1','mass2','mass3','mass4',
                          'baggy_extrab','mass0_extrab','mass1_extrab','mass2_extrab','mass3_extrab','mass4_extrab',
                          'baggy_ismonohs','mass0_ismonohs','mass1_ismonohs','mass2_ismonohs','mass3_ismonohs','mass4_ismonohs',
                          'baggy_extrab_ismonohs','mass0_extrab_ismonohs','mass1_extrab_ismonohs','mass2_extrab_ismonohs','mass3_extrab_ismonohs','mass4_extrab_ismonohs',
                          'baggy_ismonojet','mass0_ismonojet','mass1_ismonojet','mass2_ismonojet','mass3_ismonojet','mass4_ismonojet',
                          'baggy_extrab_ismonojet','mass0_extrab_ismonojet','mass1_extrab_ismonojet','mass2_extrab_ismonojet','mass3_extrab_ismonojet','mass4_extrab_ismonojet']:
                    cut = selections.all(*regions[r+'_'+s])
                    flat_variables = {k: v[cut].flatten() for k, v in variables.items()}
                    flat_weights = {k: (~np.isnan(v[cut])*weight[cut]).flatten() for k, v in variables.items()}
                    for histname, h in hout.items():
                        if not isinstance(h, hist.Hist):
                            continue
                        elif histname == 'sumw':
                            continue
                        elif histname == 'fjmass':
                            h.fill(dataset=dataset, region=r, jet_selection=s, fjmass=leading_fj_msd_corr, weight=weight*cut)
                        elif histname == 'recoil':
                            h.fill(dataset=dataset, region=r, jet_selection=s, recoil=u[r].pt, weight=weight*cut)
                        elif histname == 'CaloMinusPfOverRecoil':
                            h.fill(dataset=dataset, region=r, jet_selection=s, CaloMinusPfOverRecoil= abs(calomet.pt - met.pt) / u[r].pt, weight=weight*cut)
                        elif histname == 'mindphi':
                            h.fill(dataset=dataset, region=r, jet_selection=s, mindphi=abs(u[r].delta_phi(j_clean)).min(), weight=weight*cut)
                        elif histname == 'diledphi':
                            h.fill(dataset=dataset, region=r, jet_selection=s, diledphi=abs(lepSys[r].delta_phi(j_clean)).min(), weight=weight*cut)
                        elif histname == 'ledphi':
                            h.fill(dataset=dataset, region=r, jet_selection=s, ledphi=abs(leadlepton[r].delta_phi(j_clean)).min(), weight=weight*cut)
                        elif histname == 'recoilVSmindphi':
                            h.fill(dataset=dataset, region=r, jet_selection=s, recoil=u[r].pt, mindphi=abs(u[r].delta_phi(j_clean)).min(), weight=weight*cut)
                        else:
                            flat_variable = {histname: flat_variables[histname]}
                            h.fill(dataset=dataset, region=r, jet_selection=s, **flat_variable, weight=flat_weights[histname])
                i += 1
            return hout
コード例 #26
0
    def process(self, df):
        np.random.seed(
            10
        )  # sets seed so values from random distributions are reproducible (JER corrections)
        output = self.accumulator.identity()

        self.sample_name = df.dataset

        ## make event weights
        # data or MC distinction made internally
        evt_weights = MCWeights.get_event_weights(df,
                                                  year=args.year,
                                                  corrections=self.corrections,
                                                  BTagSFs=btaggers)

        ## initialize selections and regions
        selection = processor.PackedSelection()
        regions = {
            'Muon': {
                'Loose': {
                    'zero_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_0'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_0'},
                    },
                    'one_btag': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_1'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_1'},
                    },
                    'two_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_2'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_2'},
                    },
                    'threePlus_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_3p'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_3p'},
                    },
                },
                'Tight': {
                    'zero_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_0'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_0'},
                    },
                    'one_btag': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_1'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_1'},
                    },
                    'two_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_2'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_2'},
                    },
                    'threePlus_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_3p'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_3p'},
                    },
                },
            },
            'Electron': {
                'Loose': {
                    'zero_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_0'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_0'},
                    },
                    'one_btag': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_1'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_1'},
                    },
                    'two_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_2'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_2'},
                    },
                    'threePlus_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_3p'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_3p'},
                    },
                },
                'Tight': {
                    'zero_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_0'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_0'},
                    },
                    'one_btag': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_1'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_1'},
                    },
                    'two_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_2'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_2'},
                    },
                    'threePlus_btags': {
                        '3Jets':
                        {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_3p'},
                        '4PJets':
                        {'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_3p'},
                    },
                },
            },
        }

        ## object selection
        objsel_evts = objsel.select(df,
                                    year=args.year,
                                    corrections=self.corrections,
                                    accumulator=output)
        output['cutflow'][
            'nEvts passing jet and lepton obj selection'] += objsel_evts.sum()
        selection.add('jets_3', df['Jet'].counts == 3)
        selection.add('jets_4p', df['Jet'].counts > 3)
        selection.add('objselection', objsel_evts)
        #selection.add('DeepJet_pass', df['Jet']['DeepJet'+wps_to_use[0]].sum() >= 2)
        #selection.add('DeepCSV_pass', df['Jet']['DeepCSV'+wps_to_use[0]].sum() >= 2)
        selection.add('DeepCSV_0',
                      df['Jet']['DeepCSV' + wps_to_use[0]].sum() == 0)
        selection.add('DeepCSV_1',
                      df['Jet']['DeepCSV' + wps_to_use[0]].sum() == 1)
        selection.add('DeepCSV_2',
                      df['Jet']['DeepCSV' + wps_to_use[0]].sum() == 2)
        selection.add('DeepCSV_3p',
                      df['Jet']['DeepCSV' + wps_to_use[0]].sum() >= 3)

        #set_trace()
        # sort jets by btag value, needed when making permutations
        df['Jet'] = df['Jet'][df['Jet']['btagDeepB'].argsort(
            ascending=False)] if btaggers[0] == 'DeepCSV' else df['Jet'][
                df['Jet']['btagDeepFlavB'].argsort(ascending=False)]

        self.isData = self.sample_name.startswith('data_Single')
        if self.isData:
            isSE_Data = self.sample_name.startswith('data_SingleElectron')
            isSM_Data = self.sample_name.startswith('data_SingleMuon')
            runs = df.run
            lumis = df.luminosityBlock
            Golden_Json_LumiMask = lumi_tools.LumiMask(
                '%s/inputs/data/LumiMasks/%s_GoldenJson.txt' %
                (proj_dir, args.year))
            LumiMask = Golden_Json_LumiMask.__call__(
                runs, lumis)  ## returns array of valid events
            selection.add('lumimask', LumiMask)

            ## object selection and add different selections
            if isSM_Data:
                del regions['Electron']
                ## muons
                selection.add('tight_MU', df['Muon']['TIGHTMU'].sum() ==
                              1)  # one muon passing TIGHT criteria
                selection.add('loose_MU', df['Muon']['LOOSEMU'].sum() ==
                              1)  # one muon passing LOOSE criteria
                #selection.add('loose_or_tight_MU', (df['Muon']['LOOSEMU'] | df['Muon']['TIGHTMU']).sum() == 1) # one muon passing LOOSE or TIGHT criteria
            if isSE_Data:
                del regions['Muon']
                ## electrons
                selection.add('tight_EL', df['Electron']['TIGHTEL'].sum() ==
                              1)  # one electron passing TIGHT criteria
                selection.add('loose_EL', df['Electron']['LOOSEEL'].sum() ==
                              1)  # one electron passing LOOSE criteria
                #selection.add('loose_or_tight_EL', (df['Electron']['LOOSEEL'] | df['Electron']['TIGHTEL']).sum() == 1) # one electron passing LOOSE or TIGHT criteria

            for lepton in regions.keys():
                for lepcat in regions[lepton].keys():
                    for btagregion in regions[lepton][lepcat].keys():
                        for jmult in regions[lepton][lepcat][btagregion].keys(
                        ):
                            regions[lepton][lepcat][btagregion][jmult].update(
                                {'lumimask'})

        if not self.isData:
            ## add different selections
            ## muons
            selection.add('tight_MU', df['Muon']['TIGHTMU'].sum() ==
                          1)  # one muon passing TIGHT criteria
            selection.add('loose_MU', df['Muon']['LOOSEMU'].sum() ==
                          1)  # one muon passing LOOSE criteria
            #selection.add('loose_or_tight_MU', (df['Muon']['LOOSEMU'] | df['Muon']['TIGHTMU']).sum() == 1) # one muon passing LOOSE or TIGHT criteria
            ## electrons
            selection.add('tight_EL', df['Electron']['TIGHTEL'].sum() ==
                          1)  # one electron passing TIGHT criteria
            selection.add('loose_EL', df['Electron']['LOOSEEL'].sum() ==
                          1)  # one electron passing LOOSE criteria
            #selection.add('loose_or_tight_EL', (df['Electron']['LOOSEEL'] | df['Electron']['TIGHTEL']).sum() == 1) # one electron passing LOOSE or TIGHT criteria

            #set_trace()
            ### apply lepton SFs to MC (only applicable to tight leptons)
            if 'LeptonSF' in corrections.keys():
                tight_mu_cut = selection.require(
                    objselection=True, tight_MU=True
                )  # find events passing muon object selection with one tight muon
                tight_muons = df['Muon'][tight_mu_cut][(
                    df['Muon'][tight_mu_cut]['TIGHTMU'] == True)]
                evt_weights._weights['Muon_SF'][
                    tight_mu_cut] = MCWeights.get_lepton_sf(
                        year=args.year,
                        lepton='Muons',
                        corrections=lepSF_correction,
                        pt=tight_muons.pt.flatten(),
                        eta=tight_muons.eta.flatten())
                tight_el_cut = selection.require(
                    objselection=True, tight_EL=True
                )  # find events passing electron object selection with one tight electron
                tight_electrons = df['Electron'][tight_el_cut][(
                    df['Electron'][tight_el_cut]['TIGHTEL'] == True)]
                evt_weights._weights['Electron_SF'][
                    tight_el_cut] = MCWeights.get_lepton_sf(
                        year=args.year,
                        lepton='Electrons',
                        corrections=lepSF_correction,
                        pt=tight_electrons.pt.flatten(),
                        eta=tight_electrons.etaSC.flatten())

                ## apply btagging SFs to MC
            if corrections['BTagSF'] == True:
                #set_trace()
                threeJets_cut = selection.require(objselection=True,
                                                  jets_3=True)
                #deepjet_3j_wts = self.corrections['BTag_Constructors']['DeepJet']['3Jets'].get_scale_factor(jets=df['Jet'][threeJets_cut], passing_cut='DeepJet'+wps_to_use[0])
                #evt_weights._weights['DeepJet'][threeJets_cut] = deepjet_3j_wts['central'].prod()
                deepcsv_3j_wts = self.corrections['BTag_Constructors'][
                    'DeepCSV']['3Jets'].get_scale_factor(
                        jets=df['Jet'][threeJets_cut],
                        passing_cut='DeepCSV' + wps_to_use[0])
                evt_weights._weights['DeepCSV'][
                    threeJets_cut] = deepcsv_3j_wts['central'].prod()

                fourplusJets_cut = selection.require(objselection=True,
                                                     jets_4p=True)
                #deepjet_4pj_wts = self.corrections['BTag_Constructors']['DeepJet']['4PJets'].get_scale_factor(jets=df['Jet'][fourplusJets_cut], passing_cut='DeepJet'+wps_to_use[0])
                #evt_weights._weights['DeepJet'][fourplusJets_cut] = deepjet_4pj_wts['central'].prod()
                deepcsv_4pj_wts = self.corrections['BTag_Constructors'][
                    'DeepCSV']['4PJets'].get_scale_factor(
                        jets=df['Jet'][fourplusJets_cut],
                        passing_cut='DeepCSV' + wps_to_use[0])
                evt_weights._weights['DeepCSV'][
                    fourplusJets_cut] = deepcsv_4pj_wts['central'].prod()

            # don't use ttbar events with indices % 10 == 0, 1, 2
            if self.sample_name in Nominal_ttJets:
                events = df.event
                selection.add(
                    'keep_ttbar',
                    ~np.stack([((events % 10) == idx) for idx in [0, 1, 2]],
                              axis=1).any(axis=1))
                for lepton in regions.keys():
                    for lepcat in regions[lepton].keys():
                        for btagregion in regions[lepton][lepcat].keys():
                            for jmult in regions[lepton][lepcat][
                                    btagregion].keys():
                                sel = regions[lepton][lepcat][btagregion][
                                    jmult]
                                sel.update({'keep_ttbar'})

        #set_trace()
        ## fill hists for each region
        for lepton in regions.keys():
            lepSF_to_exclude = 'Electron_SF' if lepton == 'Muon' else 'Muon_SF'
            btagSF_to_exclude = 'DeepCSV'
            for lepcat in regions[lepton].keys():
                for btagregion in regions[lepton][lepcat].keys():
                    for jmult in regions[lepton][lepcat][btagregion].keys():
                        cut = selection.all(
                            *regions[lepton][lepcat][btagregion][jmult])
                        #set_trace()

                        if cut.sum() > 0:
                            ltype = 'MU' if lepton == 'Muon' else 'EL'
                            if 'loose_or_tight_%s' % ltype in regions[lepton][
                                    lepcat][btagregion][jmult]:
                                lep_mask = ((df[lepton][cut]['TIGHT%s' % ltype]
                                             == True) |
                                            (df[lepton][cut]['LOOSE%s' % ltype]
                                             == True))
                            elif 'tight_%s' % ltype in regions[lepton][lepcat][
                                    btagregion][jmult]:
                                lep_mask = (df[lepton][cut]['TIGHT%s' %
                                                            ltype] == True)
                            elif 'loose_%s' % ltype in regions[lepton][lepcat][
                                    btagregion][jmult]:
                                lep_mask = (df[lepton][cut]['LOOSE%s' %
                                                            ltype] == True)
                            else:
                                raise ValueError(
                                    "Not sure what lepton type to choose for event"
                                )

                                ## calculate MT
                            MT = make_vars.MT(df[lepton][cut][lep_mask],
                                              df['MET'][cut])
                            MTHigh = (MT >= MTcut).flatten()

                            jets = df['Jet'][cut][MTHigh]
                            leptons = df[lepton][cut][lep_mask][MTHigh]

                            evt_weights_to_use = evt_weights.weight()

                            lepSF = np.ones(MTHigh.sum(
                            )) if self.isData else evt_weights._weights[
                                '%s_SF' % lepton][cut][MTHigh].flatten()
                            pu_weight = np.ones(MTHigh.sum(
                            )) if self.isData else evt_weights._weights[
                                'pileup_weight'][cut][MTHigh].flatten()
                            for btag_applied in [True, False]:
                                btagSF = np.ones(MTHigh.sum()) if (
                                    self.isData or btag_applied
                                    == False) else evt_weights._weights[
                                        btaggers[0]][cut][MTHigh].flatten()
                                if not self.isData:
                                    SFs_to_exclude = [
                                        lepSF_to_exclude
                                    ] if btag_applied else [
                                        lepSF_to_exclude, btagSF_to_exclude
                                    ]
                                    evt_weights_to_use = evt_weights.partial_weight(
                                        exclude=SFs_to_exclude)
                                tot_weight = evt_weights_to_use[cut][
                                    MTHigh].flatten()

                                output['BTagSF'].fill(
                                    dataset=self.sample_name,
                                    btagging=str(btag_applied),
                                    jmult=jmult,
                                    leptype=lepton,
                                    lepcat=lepcat,
                                    btag=btagregion,
                                    sf=btagSF)
                                output['EvtWeight'].fill(
                                    dataset=self.sample_name,
                                    btagging=str(btag_applied),
                                    jmult=jmult,
                                    leptype=lepton,
                                    lepcat=lepcat,
                                    btag=btagregion,
                                    sf=tot_weight)

                                output = self.fill_hists(
                                    acc=output,
                                    btagging_applied=str(btag_applied),
                                    jetmult=jmult,
                                    leptype=lepton,
                                    lepcat=lepcat,
                                    btag=btagregion,
                                    jets=jets,
                                    leptons=leptons,
                                    MT=MT[MTHigh].flatten(),
                                    evt_weights=tot_weight)

                            #set_trace()
                            if not self.isData:
                                output['nTrueInt_puweight'].fill(
                                    dataset=self.sample_name,
                                    jmult=jmult,
                                    leptype=lepton,
                                    lepcat=lepcat,
                                    btag=btagregion,
                                    pu=df['Pileup_nTrueInt'][cut][MTHigh],
                                    weight=pu_weight)
                                output['nTrueInt_noweight'].fill(
                                    dataset=self.sample_name,
                                    jmult=jmult,
                                    leptype=lepton,
                                    lepcat=lepcat,
                                    btag=btagregion,
                                    pu=df['Pileup_nTrueInt'][cut][MTHigh])
                            output['rho_puweight'].fill(
                                dataset=self.sample_name,
                                jmult=jmult,
                                leptype=lepton,
                                lepcat=lepcat,
                                btag=btagregion,
                                rho=df['fixedGridRhoFastjetAll'][cut][MTHigh],
                                weight=pu_weight)
                            output['rho_noweight'].fill(
                                dataset=self.sample_name,
                                jmult=jmult,
                                leptype=lepton,
                                lepcat=lepcat,
                                btag=btagregion,
                                rho=df['fixedGridRhoFastjetAll'][cut][MTHigh])
                            output['nvtx_puweight'].fill(
                                dataset=self.sample_name,
                                jmult=jmult,
                                leptype=lepton,
                                lepcat=lepcat,
                                btag=btagregion,
                                vtx=df['PV_npvs'][cut][MTHigh],
                                weight=pu_weight)
                            output['nvtx_noweight'].fill(
                                dataset=self.sample_name,
                                jmult=jmult,
                                leptype=lepton,
                                lepcat=lepcat,
                                btag=btagregion,
                                vtx=df['PV_npvs'][cut][MTHigh])
                            output['LepSF'].fill(dataset=self.sample_name,
                                                 jmult=jmult,
                                                 leptype=lepton,
                                                 lepcat=lepcat,
                                                 btag=btagregion,
                                                 sf=lepSF)
                            output['PileupWeight'].fill(
                                dataset=self.sample_name,
                                jmult=jmult,
                                leptype=lepton,
                                lepcat=lepcat,
                                btag=btagregion,
                                sf=pu_weight)

        return output
コード例 #27
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_znunu'] = is_lo_znunu(dataset)
        df['is_lo_w_ewk'] = is_lo_w_ewk(dataset)
        df['is_lo_z_ewk'] = is_lo_z_ewk(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[
            'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[
                'is_lo_w_ewk'] | df['is_lo_z_ewk']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[
                'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']:
            gen = setup_gen_candidates(df)
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_combined']
        elif df['is_lo_g']:
            gen = setup_gen_candidates(df)
            all_gen_photons = gen[(gen.pdg == 22)]
            prompt_mask = (all_gen_photons.status
                           == 1) & (all_gen_photons.flag & 1 == 1)
            stat1_mask = (all_gen_photons.status == 1)
            gen_photons = all_gen_photons[prompt_mask |
                                          (~prompt_mask.any()) & stat1_mask]
            gen_photon = gen_photons[gen_photons.pt.argmax()]

            gen_v_pt = gen_photon.pt.max()

        # Generator-level leading dijet mass
        if df['has_lhe_v_pt']:
            genjets = setup_lhe_cleaned_genjets(df)
            digenjet = genjets[:, :2].distincts()
            df['mjj_gen'] = digenjet.mass.max()
            df['mjj_gen'] = np.where(df['mjj_gen'] > 0, df['mjj_gen'], 0)

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates(
            df, cfg)

        # Remove jets in accordance with the noise recipe
        if df['year'] == 2017:
            ak4 = ak4[(ak4.ptraw > 50) | (ak4.abseta < 2.65) |
                      (ak4.abseta > 3.139)]
            bjets = bjets[(bjets.ptraw > 50) | (bjets.abseta < 2.65) |
                          (bjets.abseta > 3.139)]

        # Filtering ak4 jets according to pileup ID
        ak4 = ak4[ak4.puid]

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts == 1) *
                       mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.absetasc < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts == 1) *
                       mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index = ak4.pt.argmax()

        elejet_pairs = ak4[:, :1].cross(electrons)
        df['dREleJet'] = np.hypot(
            elejet_pairs.i0.eta - elejet_pairs.i1.eta,
            dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:, :1].cross(muons)
        df['dRMuonJet'] = np.hypot(
            muonjet_pairs.i0.eta - muonjet_pairs.i1.eta,
            dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons,
                                                   muons, photons)

        df["dPFCaloSR"] = (met_pt - df["CaloMET_pt"]) / met_pt
        df["dPFCaloCR"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]

        df["dPFTkSR"] = (met_pt - df["TkMET_pt"]) / met_pt

        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4,
                                                  df['recoil_phi'],
                                                  njet=4,
                                                  ptmin=30,
                                                  etamax=5.0)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4,
                                               met_phi,
                                               njet=4,
                                               ptmin=30,
                                               etamax=5.0)
        selection = processor.PackedSelection()

        # Triggers
        pass_all = np.ones(df.size) == 1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)

        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts == 0)
        selection.add('veto_muo', muons.counts == 0)
        selection.add('veto_photon', photons.counts == 0)
        selection.add('veto_tau', taus.counts == 0)
        selection.add('at_least_one_tau', taus.counts > 0)
        selection.add('veto_b', bjets.counts == 0)
        selection.add('mindphijr',
                      df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('mindphijm',
                      df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR)

        selection.add('dpfcalo_sr',
                      np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('dpfcalo_cr',
                      np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO)

        selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL)
        selection.add('met_sr', met_pt > cfg.SELECTION.SIGNAL.RECOIL)

        # AK4 dijet
        diak4 = ak4[:, :2].distincts()
        leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & (
            np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA)
        trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & (
            np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA)
        hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any()
        has_track0 = np.abs(diak4.i0.eta) <= 2.5
        has_track1 = np.abs(diak4.i1.eta) <= 2.5

        leadak4_id = diak4.i0.tightId & (has_track0 * (
            (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) &
            (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0)
        trailak4_id = has_track1 * (
            (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) &
            (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1

        df['mjj'] = diak4.mass.max()
        df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max())
        df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max()

        leading_jet_in_horn = ((diak4.i0.abseta < 3.2) &
                               (diak4.i0.abseta > 2.8)).any()
        trailing_jet_in_horn = ((diak4.i1.abseta < 3.2) &
                                (diak4.i1.abseta > 2.8)).any()

        selection.add('hornveto', (df['dPFTkSR'] < 0.8)
                      | ~(leading_jet_in_horn | trailing_jet_in_horn))

        if df['year'] == 2018:
            if df['is_data']:
                metphihem_mask = ~((met_phi > -1.8) & (met_phi < -0.6) &
                                   (df['run'] > 319077))
            else:
                metphihem_mask = pass_all
            selection.add("metphihemextveto", metphihem_mask)
            selection.add('no_el_in_hem',
                          electrons[electrons_in_hem(electrons)].counts == 0)
        else:
            selection.add("metphihemextveto", pass_all)
            selection.add('no_el_in_hem', pass_all)

        selection.add('two_jets', diak4.counts > 0)
        selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
        selection.add('trailak4_pt_eta', trailak4_pt_eta.any())
        selection.add('hemisphere', hemisphere)
        selection.add('leadak4_id', leadak4_id.any())
        selection.add('trailak4_id', trailak4_id.any())
        selection.add('mjj',
                      df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
        selection.add(
            'dphijj',
            df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
        selection.add(
            'detajj',
            df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)

        # Cleaning cuts for signal region
        max_neEmEF = np.maximum(diak4.i0.nef, diak4.i1.nef)
        selection.add('max_neEmEF', (max_neEmEF < 0.7).any())

        vec_b = calculate_vecB(ak4, met_pt, met_phi)
        vec_dphi = calculate_vecDPhi(ak4, met_pt, met_phi, df['TkMET_phi'])

        no_jet_in_trk = (diak4.i0.abseta > 2.5).any() & (diak4.i1.abseta >
                                                         2.5).any()
        no_jet_in_hf = (diak4.i0.abseta < 3.0).any() & (diak4.i1.abseta <
                                                        3.0).any()

        at_least_one_jet_in_hf = (diak4.i0.abseta >
                                  3.0).any() | (diak4.i1.abseta > 3.0).any()
        at_least_one_jet_in_trk = (diak4.i0.abseta <
                                   2.5).any() | (diak4.i1.abseta < 2.5).any()

        # Categorized cleaning cuts
        eemitigation = ((no_jet_in_hf | at_least_one_jet_in_trk) &
                        (vec_dphi < 1.0)) | (
                            (no_jet_in_trk & at_least_one_jet_in_hf) &
                            (vec_b < 0.2))

        selection.add('eemitigation', eemitigation)

        # HF-HF veto in SR
        both_jets_in_hf = (diak4.i0.abseta > 3.0) & (diak4.i1.abseta > 3.0)
        selection.add('veto_hfhf', ~both_jets_in_hf.any())

        # Divide into three categories for trigger study
        if cfg.RUN.TRIGGER_STUDY:
            two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs(
                diak4.i1.eta) <= 2.4)
            two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs(
                diak4.i1.eta) > 2.4)
            one_jet_forward_one_jet_central = (~two_central_jets) & (
                ~two_forward_jets)
            selection.add('two_central_jets', two_central_jets.any())
            selection.add('two_forward_jets', two_forward_jets.any())
            selection.add('one_jet_forward_one_jet_central',
                          one_jet_forward_one_jet_central.any())

        # Dimuon CR
        leadmuon_index = muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge == 0).any())
        selection.add('two_muons', muons.counts == 2)

        # Single muon CR
        selection.add('one_muon', muons.counts == 1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index = electrons.pt.argmax()

        selection.add('one_electron', electrons.counts == 1)
        selection.add('two_electrons', electrons.counts == 2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())


        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge == 0).any())

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index = photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId & photons.barrel

        selection.add('one_photon', photons.counts == 1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig',
                      photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if df['has_lhe_v_pt']:
            output['genvpt_check'].fill(vpt=gen_v_pt,
                                        type="Nano",
                                        dataset=dataset)

        if 'LHE_Njets' in df:
            output['lhe_njets'].fill(dataset=dataset,
                                     multiplicity=df['LHE_Njets'])
        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])
        if 'LHE_HTIncoming' in df:
            output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons,
                                        electrons, photons, cfg)
            weights = pileup_weights(weights, df, evaluator, cfg)
            weights = ak4_em_frac_weights(weights, diak4, evaluator)
            if not (gen_v_pt is None):
                weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt,
                                             df['mjj_gen'])

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask]]
                output['kinematics']['met_phi'] += [met_phi[mask]]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask]]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt]
                output['kinematics']['ak4eta0'] += [
                    ak4[leadak4_index][mask].eta
                ]
                output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [
                    muons[df['is_tight_muon']].counts[mask]
                ]
                output['kinematics']['mupt0'] += [
                    muons[leadmuon_index][mask].pt
                ]
                output['kinematics']['mueta0'] += [
                    muons[leadmuon_index][mask].eta
                ]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [
                    electrons[df['is_tight_electron']].counts[mask]
                ]
                output['kinematics']['elpt0'] += [
                    electrons[leadelectron_index][mask].pt
                ]
                output['kinematics']['eleta0'] += [
                    electrons[leadelectron_index][mask].eta
                ]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [
                    photons[df['is_tight_photon']].counts[mask]
                ]
                output['kinematics']['gpt0'] += [
                    photons[leadphoton_index][mask].pt
                ]
                output['kinematics']['geta0'] += [
                    photons[leadphoton_index][mask].eta
                ]

        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']
            output['sumw_pileup'][dataset] += weights._weights['pileup'].sum()

        regions = vbfhinv_regions(cfg)

        # Get veto weights (only for MC)
        if not df['is_data']:
            veto_weights = get_veto_weights(df, cfg, evaluator, electrons,
                                            muons, taus)

        for region, cuts in regions.items():
            exclude = [None]
            region_weights = copy.deepcopy(weights)

            if not df['is_data']:
                ### Trigger weights
                if re.match(r'cr_(\d+)e.*', region):
                    p_pass_data = 1 - (1 -
                                       evaluator["trigger_electron_eff_data"]
                                       (electrons.etasc, electrons.pt)).prod()
                    p_pass_mc = 1 - (1 - evaluator["trigger_electron_eff_mc"]
                                     (electrons.etasc, electrons.pt)).prod()
                    trigger_weight = p_pass_data / p_pass_mc
                    trigger_weight[np.isnan(trigger_weight)] = 1
                    region_weights.add('trigger', trigger_weight)
                elif re.match(r'cr_(\d+)m.*', region) or re.match(
                        'sr_.*', region):
                    region_weights.add(
                        'trigger_met',
                        evaluator["trigger_met"](df['recoil_pt']))
                elif re.match(r'cr_g.*', region):
                    photon_trigger_sf(region_weights, photons, df)

                # Veto weights
                if re.match('.*no_veto.*', region):
                    exclude = [
                        "muon_id_iso_tight", "muon_id_tight", "muon_iso_tight",
                        "muon_id_loose", "muon_iso_loose", "ele_reco",
                        "ele_id_tight", "ele_id_loose", "tau_id"
                    ]
                    region_weights.add(
                        "veto",
                        veto_weights.partial_weight(include=["nominal"]))

                # HEM-veto weights for signal region MC
                if re.match('^sr_vbf.*', region) and df['year'] == 2018:
                    # Events that lie in the HEM-veto region
                    events_to_weight_mask = (met_phi > -1.8) & (met_phi < -0.6)
                    # Weight is the "good lumi fraction" for 2018
                    weight = 21.1 / 59.7
                    hem_weight = np.where(events_to_weight_mask, weight, 1.0)

                    region_weights.add("hem_weight", hem_weight)

            # This is the default weight for this region
            rweight = region_weights.partial_weight(exclude=exclude)

            # Blinding
            if (self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region][dataset]['all'] += df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' +
                           region][dataset][cutname] += selection.all(
                               *cuts[:icut + 1]).sum()

            mask = selection.all(*cuts)

            if cfg.RUN.SAVE.TREE:
                if region in ['cr_1e_vbf', 'cr_1m_vbf']:
                    output['tree_int64'][region][
                        "event"] += processor.column_accumulator(
                            df["event"][mask])
                    output['tree_float16'][region][
                        "gen_v_pt"] += processor.column_accumulator(
                            np.float16(gen_v_pt[mask]))
                    output['tree_float16'][region][
                        "gen_mjj"] += processor.column_accumulator(
                            np.float16(df['mjj_gen'][mask]))
                    output['tree_float16'][region][
                        "recoil_pt"] += processor.column_accumulator(
                            np.float16(df["recoil_pt"][mask]))
                    output['tree_float16'][region][
                        "recoil_phi"] += processor.column_accumulator(
                            np.float16(df["recoil_phi"][mask]))
                    output['tree_float16'][region][
                        "mjj"] += processor.column_accumulator(
                            np.float16(df["mjj"][mask]))

                    output['tree_float16'][region][
                        "leadak4_pt"] += processor.column_accumulator(
                            np.float16(diak4.i0.pt[mask]))
                    output['tree_float16'][region][
                        "leadak4_eta"] += processor.column_accumulator(
                            np.float16(diak4.i0.eta[mask]))
                    output['tree_float16'][region][
                        "leadak4_phi"] += processor.column_accumulator(
                            np.float16(diak4.i0.phi[mask]))

                    output['tree_float16'][region][
                        "trailak4_pt"] += processor.column_accumulator(
                            np.float16(diak4.i1.pt[mask]))
                    output['tree_float16'][region][
                        "trailak4_eta"] += processor.column_accumulator(
                            np.float16(diak4.i1.eta[mask]))
                    output['tree_float16'][region][
                        "trailak4_phi"] += processor.column_accumulator(
                            np.float16(diak4.i1.phi[mask]))

                    output['tree_float16'][region][
                        "minDPhiJetRecoil"] += processor.column_accumulator(
                            np.float16(df["minDPhiJetRecoil"][mask]))
                    if '_1e_' in region:
                        output['tree_float16'][region][
                            "leadlep_pt"] += processor.column_accumulator(
                                np.float16(electrons.pt.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_eta"] += processor.column_accumulator(
                                np.float16(electrons[
                                    electrons.pt.argmax()].eta.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_phi"] += processor.column_accumulator(
                                np.float16(electrons[
                                    electrons.pt.argmax()].phi.max()[mask]))
                    elif '_1m_' in region:
                        output['tree_float16'][region][
                            "leadlep_pt"] += processor.column_accumulator(
                                np.float16(muons.pt.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_eta"] += processor.column_accumulator(
                                np.float16(
                                    muons[muons.pt.argmax()].eta.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_phi"] += processor.column_accumulator(
                                np.float16(
                                    muons[muons.pt.argmax()].phi.max()[mask]))

                    for name, w in region_weights._weights.items():
                        output['tree_float16'][region][
                            f"weight_{name}"] += processor.column_accumulator(
                                np.float16(w[mask]))
                    output['tree_float16'][region][
                        f"weight_total"] += processor.column_accumulator(
                            np.float16(rweight[mask]))
                if region == 'inclusive':
                    output['tree_int64'][region][
                        "event"] += processor.column_accumulator(
                            df["event"][mask])
                    for name in selection.names:
                        output['tree_bool'][region][
                            name] += processor.column_accumulator(
                                np.bool_(selection.all(*[name])[mask]))
            # Save the event numbers of events passing this selection
            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])

            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=rweight[mask])

            fill_mult('ak4_mult', ak4[ak4.pt > 30])
            fill_mult('bjet_mult', bjets)
            fill_mult('loose_ele_mult', electrons)
            fill_mult('tight_ele_mult', electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult', muons)
            fill_mult('tight_muo_mult', muons[df['is_tight_muon']])
            fill_mult('tau_mult', taus)
            fill_mult('photon_mult', photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(dataset=dataset, region=region, **kwargs)

            # Monitor weights
            for wname, wvalue in region_weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])
                ezfill("weights_wide",
                       weight_type=wname,
                       weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, rweight[mask])
            w_alljets_nopref = weight_shape(
                ak4[mask].eta,
                region_weights.partial_weight(exclude=exclude +
                                              ['prefire'])[mask])

            ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets)

            ezfill('ak4_eta_nopref',
                   jeteta=ak4[mask].eta.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_phi_nopref',
                   jetphi=ak4[mask].phi.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_pt_nopref',
                   jetpt=ak4[mask].pt.flatten(),
                   weight=w_alljets_nopref)

            # Leading ak4
            w_diak4 = weight_shape(diak4.pt[mask], rweight[mask])
            ezfill('ak4_eta0',
                   jeteta=diak4.i0.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi0',
                   jetphi=diak4.i0.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt0',
                   jetpt=diak4.i0.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw0',
                   jetpt=diak4.i0.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf0',
                   frac=diak4.i0.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf0',
                   frac=diak4.i0.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst0',
                   nconst=diak4.i0.nconst[mask].flatten(),
                   weight=w_diak4)

            # Trailing ak4
            ezfill('ak4_eta1',
                   jeteta=diak4.i1.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi1',
                   jetphi=diak4.i1.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt1',
                   jetpt=diak4.i1.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw1',
                   jetpt=diak4.i1.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf1',
                   frac=diak4.i1.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf1',
                   frac=diak4.i1.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst1',
                   nconst=diak4.i1.nconst[mask].flatten(),
                   weight=w_diak4)

            # B tag discriminator
            btag = getattr(ak4, cfg.BTAG.ALGO)
            w_btag = weight_shape(btag[mask], rweight[mask])
            ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag)

            # MET
            ezfill('dpfcalo_cr',
                   dpfcalo=df["dPFCaloCR"][mask],
                   weight=rweight[mask])
            ezfill('dpfcalo_sr',
                   dpfcalo=df["dPFCaloSR"][mask],
                   weight=rweight[mask])
            ezfill('met', met=met_pt[mask], weight=rweight[mask])
            ezfill('met_phi', phi=met_phi[mask], weight=rweight[mask])
            ezfill('recoil',
                   recoil=df["recoil_pt"][mask],
                   weight=rweight[mask])
            ezfill('recoil_phi',
                   phi=df["recoil_phi"][mask],
                   weight=rweight[mask])
            ezfill('dphijm',
                   dphi=df["minDPhiJetMet"][mask],
                   weight=rweight[mask])
            ezfill('dphijr',
                   dphi=df["minDPhiJetRecoil"][mask],
                   weight=rweight[mask])

            ezfill('dphijj', dphi=df["dphijj"][mask], weight=rweight[mask])
            ezfill('detajj', deta=df["detajj"][mask], weight=rweight[mask])
            ezfill('mjj', mjj=df["mjj"][mask], weight=rweight[mask])

            if gen_v_pt is not None:
                ezfill('gen_vpt',
                       vpt=gen_v_pt[mask],
                       weight=df['Generator_weight'][mask])
                ezfill('gen_mjj',
                       mjj=df['mjj_gen'][mask],
                       weight=df['Generator_weight'][mask])

            # Photon CR data-driven QCD estimate
            if df['is_data'] and re.match("cr_g.*", region) and re.match(
                    "(SinglePhoton|EGamma).*", dataset):
                w_imp = photon_impurity_weights(
                    photons[leadphoton_index].pt.max()[mask], df["year"])
                output['mjj'].fill(dataset=data_driven_qcd_dataset(dataset),
                                   region=region,
                                   mjj=df["mjj"][mask],
                                   weight=rweight[mask] * w_imp)
                output['recoil'].fill(dataset=data_driven_qcd_dataset(dataset),
                                      region=region,
                                      recoil=df["recoil_pt"][mask],
                                      weight=rweight[mask] * w_imp)

            # Uncertainty variations
            if df['is_lo_z'] or df['is_nlo_z'] or df['is_lo_z_ewk']:
                theory_uncs = [x for x in cfg.SF.keys() if x.startswith('unc')]
                for unc in theory_uncs:
                    reweight = evaluator[unc](gen_v_pt)
                    w = (region_weights.weight() * reweight)[mask]
                    ezfill('mjj_unc',
                           mjj=df['mjj'][mask],
                           uncertainty=unc,
                           weight=w)

            # Two dimensional
            ezfill('recoil_mjj',
                   recoil=df["recoil_pt"][mask],
                   mjj=df["mjj"][mask],
                   weight=rweight[mask])

            # Muons
            if '_1m_' in region or '_2m_' in region or 'no_veto' in region:
                w_allmu = weight_shape(muons.pt[mask], rweight[mask])
                ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu)
                ezfill('muon_pt_abseta',
                       pt=muons.pt[mask].flatten(),
                       abseta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_mt', mt=df['MT_mu'][mask], weight=rweight[mask])
                ezfill('muon_eta',
                       eta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_phi',
                       phi=muons.phi[mask].flatten(),
                       weight=w_allmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], rweight[mask])
                ezfill('muon_pt0',
                       pt=dimuons.i0.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_pt1',
                       pt=dimuons.i1.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta0',
                       eta=dimuons.i0.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta1',
                       eta=dimuons.i1.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi0',
                       phi=dimuons.i0.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi1',
                       phi=dimuons.i1.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_pt',
                       pt=dimuons.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_eta',
                       eta=dimuons.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_mass',
                       dilepton_mass=dimuons.mass[mask].flatten(),
                       weight=w_dimu)

            # Electrons
            if '_1e_' in region or '_2e_' in region or 'no_veto' in region:
                w_allel = weight_shape(electrons.pt[mask], rweight[mask])
                ezfill('electron_pt',
                       pt=electrons.pt[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_pt_eta',
                       pt=electrons.pt[mask].flatten(),
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_mt',
                       mt=df['MT_el'][mask],
                       weight=rweight[mask])
                ezfill('electron_eta',
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_phi',
                       phi=electrons.phi[mask].flatten(),
                       weight=w_allel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask], rweight[mask])
                ezfill('electron_pt0',
                       pt=dielectrons.i0.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_pt1',
                       pt=dielectrons.i1.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta0',
                       eta=dielectrons.i0.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta1',
                       eta=dielectrons.i1.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi0',
                       phi=dielectrons.i0.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi1',
                       phi=dielectrons.i1.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_pt',
                       pt=dielectrons.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_eta',
                       eta=dielectrons.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_mass',
                       dilepton_mass=dielectrons.mass[mask].flatten(),
                       weight=w_diel)

            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(
                    photons[leadphoton_index].pt[mask], rweight[mask])
                ezfill('photon_pt0',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_eta0',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_phi0',
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_pt0_recoil',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       recoil=df['recoil_pt'][mask
                                              & (leadphoton_index.counts > 0)],
                       weight=w_leading_photon)
                ezfill('photon_eta_phi',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], rweight[mask])

            # Tau
            if 'no_veto' in region:
                w_all_taus = weight_shape(taus.pt[mask], rweight[mask])
                ezfill("tau_pt", pt=taus.pt[mask].flatten(), weight=w_all_taus)

            # PV
            ezfill('npv', nvtx=df['PV_npvs'][mask], weight=rweight[mask])
            ezfill('npvgood',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=rweight[mask])

            ezfill('npv_nopu',
                   nvtx=df['PV_npvs'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
            ezfill('npvgood_nopu',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])

            ezfill('rho_all',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=region_weights.partial_weight(exclude=exclude)[mask])
            ezfill('rho_central',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=region_weights.partial_weight(exclude=exclude)[mask])
            ezfill('rho_all_nopu',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
            ezfill('rho_central_nopu',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
        return output
コード例 #28
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_w_ewk'] = is_lo_w_ewk(dataset)
        df['is_lo_z_ewk'] = is_lo_z_ewk(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[
            'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[
                'is_lo_w_ewk'] | df['is_lo_z_ewk']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[
                'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']:
            gen = setup_gen_candidates(df)
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_dress']
        elif df['is_lo_g']:
            gen = setup_gen_candidates(df)
            gen_v_pt = gen[(gen.pdg == 22) & (gen.status == 1)].pt.max()

        # Generator-level leading dijet mass
        if df['has_lhe_v_pt']:
            genjets = setup_lhe_cleaned_genjets(df)
            digenjet = genjets[:, :2].distincts()
            df['mjj_gen'] = digenjet.mass.max()

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates(
            df, cfg)

        # Filtering ak4 jets according to pileup ID
        ak4 = ak4[ak4.puid]
        bjets = bjets[bjets.puid]

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts == 1) *
                       mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts == 1) *
                       mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index = ak4.pt.argmax()

        elejet_pairs = ak4[:, :1].cross(electrons)
        df['dREleJet'] = np.hypot(
            elejet_pairs.i0.eta - elejet_pairs.i1.eta,
            dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:, :1].cross(muons)
        df['dRMuonJet'] = np.hypot(
            muonjet_pairs.i0.eta - muonjet_pairs.i1.eta,
            dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons,
                                                   muons, photons)
        df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]
        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4,
                                                  df['recoil_phi'],
                                                  njet=4,
                                                  ptmin=30,
                                                  etamax=4.7)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4,
                                               met_phi,
                                               njet=4,
                                               ptmin=30,
                                               etamax=4.7)
        selection = processor.PackedSelection()

        # Triggers
        pass_all = np.ones(df.size) == 1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)

        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts == 0)
        selection.add('veto_muo', muons.counts == 0)
        selection.add('veto_photon', photons.counts == 0)
        selection.add('veto_tau', taus.counts == 0)
        selection.add('veto_b', bjets.counts == 0)
        selection.add('mindphijr',
                      df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('dpfcalo',
                      np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL)

        if (cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018
                and not cfg.RUN.SYNC):
            selection.add('hemveto', df['hemveto'])
        else:
            selection.add('hemveto', np.ones(df.size) == 1)

        # AK4 dijet
        diak4 = ak4[:, :2].distincts()
        leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & (
            np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA)
        trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & (
            np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA)
        hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any()
        has_track0 = np.abs(diak4.i0.eta) <= 2.5
        has_track1 = np.abs(diak4.i1.eta) <= 2.5

        leadak4_id = diak4.i0.tightId & (has_track0 * (
            (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) &
            (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0)
        trailak4_id = has_track1 * (
            (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) &
            (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1

        df['mjj'] = diak4.mass.max()
        df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max())
        df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max()

        selection.add('two_jets', diak4.counts > 0)
        selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
        selection.add('trailak4_pt_eta', trailak4_pt_eta.any())
        selection.add('hemisphere', hemisphere)
        selection.add('leadak4_id', leadak4_id.any())
        selection.add('trailak4_id', trailak4_id.any())
        selection.add('mjj',
                      df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
        selection.add(
            'dphijj',
            df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
        selection.add(
            'detajj',
            df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)

        # Divide into three categories for trigger study
        if cfg.RUN.TRIGGER_STUDY:
            two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs(
                diak4.i1.eta) <= 2.4)
            two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs(
                diak4.i1.eta) > 2.4)
            one_jet_forward_one_jet_central = (~two_central_jets) & (
                ~two_forward_jets)
            selection.add('two_central_jets', two_central_jets.any())
            selection.add('two_forward_jets', two_forward_jets.any())
            selection.add('one_jet_forward_one_jet_central',
                          one_jet_forward_one_jet_central.any())

        # Dimuon CR
        leadmuon_index = muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge == 0).any())
        selection.add('two_muons', muons.counts == 2)

        # Single muon CR
        selection.add('one_muon', muons.counts == 1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index = electrons.pt.argmax()

        selection.add('one_electron', electrons.counts == 1)
        selection.add('two_electrons', electrons.counts == 2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())

        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge == 0).any())
        selection.add('two_electrons', electrons.counts == 2)

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index = photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId \
                         & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA)

        selection.add('one_photon', photons.counts == 1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig',
                      photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if df['has_lhe_v_pt']:
            output['genvpt_check'].fill(vpt=gen_v_pt,
                                        type="Nano",
                                        dataset=dataset)

        if 'LHE_Njets' in df:
            output['lhe_njets'].fill(dataset=dataset,
                                     multiplicity=df['LHE_Njets'])
        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])
        if 'LHE_HTIncoming' in df:
            output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons,
                                        electrons, photons)
            weights = pileup_weights(weights, df, evaluator, cfg)
            if not (gen_v_pt is None):
                weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt,
                                             df['mjj_gen'])

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask]]
                output['kinematics']['met_phi'] += [met_phi[mask]]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask]]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt]
                output['kinematics']['ak4eta0'] += [
                    ak4[leadak4_index][mask].eta
                ]
                output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [
                    muons[df['is_tight_muon']].counts[mask]
                ]
                output['kinematics']['mupt0'] += [
                    muons[leadmuon_index][mask].pt
                ]
                output['kinematics']['mueta0'] += [
                    muons[leadmuon_index][mask].eta
                ]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [
                    electrons[df['is_tight_electron']].counts[mask]
                ]
                output['kinematics']['elpt0'] += [
                    electrons[leadelectron_index][mask].pt
                ]
                output['kinematics']['eleta0'] += [
                    electrons[leadelectron_index][mask].eta
                ]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [
                    photons[df['is_tight_photon']].counts[mask]
                ]
                output['kinematics']['gpt0'] += [
                    photons[leadphoton_index][mask].pt
                ]
                output['kinematics']['geta0'] += [
                    photons[leadphoton_index][mask].eta
                ]

        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']
            output['sumw_pileup'][dataset] += weights._weights['pileup'].sum()

        regions = vbfhinv_regions(cfg)
        for region, cuts in regions.items():
            # Blinding
            if (self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region]['all'] += df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' + region][cutname] += selection.all(
                        *cuts[:icut + 1]).sum()

            mask = selection.all(*cuts)

            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])

            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=weights.weight()[mask])

            fill_mult('ak4_mult', ak4)
            fill_mult('bjet_mult', bjets)
            fill_mult('loose_ele_mult', electrons)
            fill_mult('tight_ele_mult', electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult', muons)
            fill_mult('tight_muo_mult', muons[df['is_tight_muon']])
            fill_mult('tau_mult', taus)
            fill_mult('photon_mult', photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(dataset=dataset, region=region, **kwargs)

            # Monitor weights
            for wname, wvalue in weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])
                ezfill("weights_wide",
                       weight_type=wname,
                       weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, weights.weight()[mask])
            w_alljets_nopref = weight_shape(
                ak4[mask].eta,
                weights.partial_weight(exclude=['prefire'])[mask])

            ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets)

            ezfill('ak4_eta_nopref',
                   jeteta=ak4[mask].eta.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_phi_nopref',
                   jetphi=ak4[mask].phi.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_pt_nopref',
                   jetpt=ak4[mask].pt.flatten(),
                   weight=w_alljets_nopref)

            # Leading ak4
            w_diak4 = weight_shape(diak4.pt[mask], weights.weight()[mask])
            ezfill('ak4_eta0',
                   jeteta=diak4.i0.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi0',
                   jetphi=diak4.i0.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt0',
                   jetpt=diak4.i0.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw0',
                   jetpt=diak4.i0.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf0',
                   frac=diak4.i0.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf0',
                   frac=diak4.i0.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst0',
                   nconst=diak4.i0.nconst[mask].flatten(),
                   weight=w_diak4)

            # Trailing ak4
            ezfill('ak4_eta1',
                   jeteta=diak4.i1.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi1',
                   jetphi=diak4.i1.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt1',
                   jetpt=diak4.i1.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw1',
                   jetpt=diak4.i1.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf1',
                   frac=diak4.i1.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf1',
                   frac=diak4.i1.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst1',
                   nconst=diak4.i1.nconst[mask].flatten(),
                   weight=w_diak4)

            # B tag discriminator
            btag = getattr(ak4, cfg.BTAG.ALGO)
            w_btag = weight_shape(btag[mask], weights.weight()[mask])
            ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag)

            # MET
            ezfill('dpfcalo',
                   dpfcalo=df["dPFCalo"][mask],
                   weight=weights.weight()[mask])
            ezfill('met', met=met_pt[mask], weight=weights.weight()[mask])
            ezfill('met_phi', phi=met_phi[mask], weight=weights.weight()[mask])
            ezfill('recoil',
                   recoil=df["recoil_pt"][mask],
                   weight=weights.weight()[mask])
            ezfill('recoil_phi',
                   phi=df["recoil_phi"][mask],
                   weight=weights.weight()[mask])
            ezfill('dphijm',
                   dphi=df["minDPhiJetMet"][mask],
                   weight=weights.weight()[mask])
            ezfill('dphijr',
                   dphi=df["minDPhiJetRecoil"][mask],
                   weight=weights.weight()[mask])

            ezfill('dphijj',
                   dphi=df["dphijj"][mask],
                   weight=weights.weight()[mask])
            ezfill('detajj',
                   deta=df["detajj"][mask],
                   weight=weights.weight()[mask])
            ezfill('mjj', mjj=df["mjj"][mask], weight=weights.weight()[mask])

            # Two dimensional
            ezfill('recoil_mjj',
                   recoil=df["recoil_pt"][mask],
                   mjj=df["mjj"][mask],
                   weight=weights.weight()[mask])

            # Muons
            if '_1m_' in region or '_2m_' in region:
                w_allmu = weight_shape(muons.pt[mask], weights.weight()[mask])
                ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu)
                ezfill('muon_mt',
                       mt=df['MT_mu'][mask],
                       weight=weights.weight()[mask])
                ezfill('muon_eta',
                       eta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_phi',
                       phi=muons.phi[mask].flatten(),
                       weight=w_allmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], weights.weight()[mask])
                ezfill('muon_pt0',
                       pt=dimuons.i0.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_pt1',
                       pt=dimuons.i1.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta0',
                       eta=dimuons.i0.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta1',
                       eta=dimuons.i1.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi0',
                       phi=dimuons.i0.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi1',
                       phi=dimuons.i1.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_pt',
                       pt=dimuons.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_eta',
                       eta=dimuons.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_mass',
                       dilepton_mass=dimuons.mass[mask].flatten(),
                       weight=w_dimu)

            # Electrons
            if '_1e_' in region or '_2e_' in region:
                w_allel = weight_shape(electrons.pt[mask],
                                       weights.weight()[mask])
                ezfill('electron_pt',
                       pt=electrons.pt[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_mt',
                       mt=df['MT_el'][mask],
                       weight=weights.weight()[mask])
                ezfill('electron_eta',
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_phi',
                       phi=electrons.phi[mask].flatten(),
                       weight=w_allel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask],
                                      weights.weight()[mask])
                ezfill('electron_pt0',
                       pt=dielectrons.i0.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_pt1',
                       pt=dielectrons.i1.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta0',
                       eta=dielectrons.i0.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta1',
                       eta=dielectrons.i1.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi0',
                       phi=dielectrons.i0.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi1',
                       phi=dielectrons.i1.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_pt',
                       pt=dielectrons.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_eta',
                       eta=dielectrons.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_mass',
                       dilepton_mass=dielectrons.mass[mask].flatten(),
                       weight=w_diel)

            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(
                    photons[leadphoton_index].pt[mask],
                    weights.weight()[mask])
                ezfill('photon_pt0',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_eta0',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_phi0',
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_pt0_recoil',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       recoil=df['recoil_pt'][mask
                                              & (leadphoton_index.counts > 0)],
                       weight=w_leading_photon)
                ezfill('photon_eta_phi',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], weights.weight()[mask])

            # PV
            ezfill('npv',
                   nvtx=df['PV_npvs'][mask],
                   weight=weights.weight()[mask])
            ezfill('npvgood',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=weights.weight()[mask])

            ezfill('npv_nopu',
                   nvtx=df['PV_npvs'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('npvgood_nopu',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])

            ezfill('rho_all',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=weights.weight()[mask])
            ezfill('rho_central',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=weights.weight()[mask])
            ezfill('rho_all_nopu',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
            ezfill('rho_central_nopu',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=weights.partial_weight(exclude=['pileup'])[mask])
        return output
コード例 #29
0
    def process(self, df):
        # Dataset parameters
        dataset = df['dataset']
        year = self._samples[dataset]['year']
        xsec = self._samples[dataset]['xsec']
        sow = self._samples[dataset]['nSumOfWeights']
        isData = self._samples[dataset]['isData']
        datasets = [
            'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon',
            'DoubleElectron'
        ]
        for d in datasets:
            if d in dataset: dataset = dataset.split('_')[0]

        ### Recover objects, selection, functions and others...
        # Objects
        isTightMuon = self._objects['isTightMuonPOG']
        isTightElectron = self._objects['isTightElectronPOG']
        isGoodJet = self._objects['isGoodJet']
        isMuonMVA = self._objects[
            'isMuonMVA']  #isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15)
        isElecMVA = self._objects[
            'isElecMVA']  #isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15)

        # Corrections
        GetMuonIsoSF = self._corrections['getMuonIso']
        GetMuonIDSF = self._corrections['getMuonID']

        # Selection
        passNJets = self._selection['passNJets']
        passMETcut = self._selection['passMETcut']
        passTrigger = self._selection['passTrigger']

        # Functions
        pow2 = self._functions['pow2']
        IsClosestToZ = self._functions['IsClosestToZ']
        GetGoodTriplets = self._functions['GetGoodTriplets']

        # Initialize objects
        met = Initialize({
            'pt': df['MET_pt'],
            'eta': 0,
            'phi': df['MET_phi'],
            'mass': 0
        })
        e = Initialize({
            'pt': df['Electron_pt'],
            'eta': df['Electron_eta'],
            'phi': df['Electron_phi'],
            'mass': df['Electron_mass']
        })
        mu = Initialize({
            'pt': df['Muon_pt'],
            'eta': df['Muon_eta'],
            'phi': df['Muon_phi'],
            'mass': df['Muon_mass']
        })
        j = Initialize({
            'pt': df['Jet_pt'],
            'eta': df['Jet_eta'],
            'phi': df['Jet_phi'],
            'mass': df['Jet_mass']
        })

        # Electron selection
        for key in self._e:
            e[key] = e.pt.zeros_like()
            if self._e[key] in df:
                e[key] = df[self._e[key]]
        #e['isGood'] = isTightElectron(e.pt, e.eta, e.dxy, e.dz, e.id, e.tightChrage, year)
        e['isGood'] = isElecMVA(e.pt,
                                e.eta,
                                e.dxy,
                                e.dz,
                                e.miniIso,
                                e.sip3d,
                                e.mvaTTH,
                                e.elecMVA,
                                e.lostHits,
                                e.convVeto,
                                e.tightCharge,
                                minpt=10)
        leading_e = e[e.pt.argmax()]
        leading_e = leading_e[leading_e.isGood.astype(np.bool)]

        # Muon selection
        for key in self._mu:
            mu[key] = mu.pt.zeros_like()
            if self._mu[key] in df:
                mu[key] = df[self._mu[key]]
        #mu['istight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.iso, mu.tight_id, mu.tightCharge, year)
        mu['isGood'] = isMuonMVA(mu.pt,
                                 mu.eta,
                                 mu.dxy,
                                 mu.dz,
                                 mu.miniIso,
                                 mu.sip3d,
                                 mu.mvaTTH,
                                 mu.mediumPrompt,
                                 mu.tightCharge,
                                 minpt=10)
        leading_mu = mu[mu.pt.argmax()]
        leading_mu = leading_mu[leading_mu.isGood.astype(np.bool)]

        e = e[e.isGood.astype(np.bool)]
        mu = mu[mu.isGood.astype(np.bool)]
        nElec = e.counts
        nMuon = mu.counts

        twoLeps = (nElec + nMuon) == 2
        threeLeps = (nElec + nMuon) == 3
        twoElec = (nElec == 2)
        twoMuon = (nMuon == 2)
        e0 = e[e.pt.argmax()]
        m0 = mu[mu.pt.argmax()]

        # Jet selection
        j['deepjet'] = df['Jet_btagDeepFlavB']
        for key in self._jet:
            j[key] = j.pt.zeros_like()
            if self._jet[key] in df:
                j[key] = df[self._jet[key]]

        j['isgood'] = isGoodJet(j.pt, j.eta, j.id)
        j['isclean'] = ~j.match(e, 0.4) & ~j.match(mu, 0.4) & j.isgood.astype(
            np.bool)
        #goodJets = j[(j['isgood'])&(j['isclean'])]
        #j0 = goodJets[goodJets.pt.argmax()]
        #nJets = goodJets.counts

        ##################################################################
        ### 2 same-sign leptons
        ##################################################################

        # emu
        singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)]
        singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)]
        em = singe.cross(singm)
        emSSmask = (em.i0.charge * em.i1.charge > 0)
        emSS = em[emSSmask]
        nemSS = len(emSS.flatten())

        # ee and mumu
        # pt>-1 to preserve jagged dimensions
        ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)]
        mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)]

        eepairs = ee.distincts()
        eeSSmask = (eepairs.i0.charge * eepairs.i1.charge > 0)
        eeonZmask = (np.abs((eepairs.i0 + eepairs.i1).mass - 91) < 15)
        eeoffZmask = (eeonZmask == 0)

        mmpairs = mm.distincts()
        mmSSmask = (mmpairs.i0.charge * mmpairs.i1.charge > 0)
        mmonZmask = (np.abs((mmpairs.i0 + mmpairs.i1).mass - 91) < 15)
        mmoffZmask = (mmonZmask == 0)

        eeSSonZ = eepairs[eeSSmask & eeonZmask]
        eeSSoffZ = eepairs[eeSSmask & eeoffZmask]
        mmSSonZ = mmpairs[mmSSmask & mmonZmask]
        mmSSoffZ = mmpairs[mmSSmask & mmoffZmask]
        neeSS = len(eeSSonZ.flatten()) + len(eeSSoffZ.flatten())
        nmmSS = len(mmSSonZ.flatten()) + len(mmSSoffZ.flatten())

        #print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS))

        # Cuts
        eeSSmask = (eeSSmask[eeSSmask].counts > 0)
        mmSSmask = (mmSSmask[mmSSmask].counts > 0)
        eeonZmask = (eeonZmask[eeonZmask].counts > 0)
        eeoffZmask = (eeoffZmask[eeoffZmask].counts > 0)
        mmonZmask = (mmonZmask[mmonZmask].counts > 0)
        mmoffZmask = (mmoffZmask[mmoffZmask].counts > 0)
        emSSmask = (emSSmask[emSSmask].counts > 0)

        # njets
        goodJets = j[(j.isclean) & (j.isgood)]
        njets = goodJets.counts
        ht = goodJets.pt.sum()
        j0 = goodJets[goodJets.pt.argmax()]

        # nbtags
        nbtags = goodJets[goodJets.deepjet > 0.2770].counts

        ##################################################################
        ### 3 leptons
        ##################################################################

        # eem
        muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)]
        elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)]
        ee_eem = elec_eem.distincts()
        ee_eemZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs(
            (ee_eem.i0 + ee_eem.i1).mass - 91) < 15)
        ee_eemOffZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs(
            (ee_eem.i0 + ee_eem.i1).mass - 91) > 15)
        ee_eemZmask = (ee_eemZmask[ee_eemZmask].counts > 0)
        ee_eemOffZmask = (ee_eemOffZmask[ee_eemOffZmask].counts > 0)

        eepair_eem = (ee_eem.i0 + ee_eem.i1)
        trilep_eem = eepair_eem.cross(muon_eem)
        trilep_eem = (trilep_eem.i0 + trilep_eem.i1)

        # mme
        muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)]
        elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)]
        mm_mme = muon_mme.distincts()
        mm_mmeZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs(
            (mm_mme.i0 + mm_mme.i1).mass - 91) < 15)
        mm_mmeOffZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs(
            (mm_mme.i0 + mm_mme.i1).mass - 91) > 15)
        mm_mmeZmask = (mm_mmeZmask[mm_mmeZmask].counts > 0)
        mm_mmeOffZmask = (mm_mmeOffZmask[mm_mmeOffZmask].counts > 0)

        mmpair_mme = (mm_mme.i0 + mm_mme.i1)
        trilep_mme = mmpair_mme.cross(elec_mme)
        trilep_mme = (trilep_mme.i0 + trilep_mme.i1)
        mZ_mme = mmpair_mme.mass
        mZ_eem = eepair_eem.mass
        m3l_eem = trilep_eem.mass
        m3l_mme = trilep_mme.mass

        ### eee and mmm
        eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)]
        mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)]
        # Create pairs
        eee_groups = eee.distincts()
        mmm_groups = mmm.distincts()
        # Calculate the invariant mass of the pairs
        invMass_eee = ((eee_groups.i0 + eee_groups.i1).mass)
        invMass_mmm = ((mmm_groups.i0 + mmm_groups.i1).mass)
        # OS pairs
        isOSeee = ((eee_groups.i0.charge != eee_groups.i1.charge))
        isOSmmm = ((mmm_groups.i0.charge != mmm_groups.i1.charge))
        # Get the ones with a mass closest to the Z mass (and in a range of  thr)
        clos_eee = IsClosestToZ(invMass_eee, thr=15)
        clos_mmm = IsClosestToZ(invMass_mmm, thr=15)
        # Finally, the mask for eee/mmm with/without OS onZ pair
        eeeOnZmask = (clos_eee) & (isOSeee)
        eeeOffZmask = (eeeOnZmask == 0)
        mmmOnZmask = (clos_mmm) & (isOSmmm)
        mmmOffZmask = (mmmOnZmask == 0)
        eeeOnZmask = (eeeOnZmask[eeeOnZmask].counts > 0)
        eeeOffZmask = (eeeOffZmask[eeeOffZmask].counts > 0)
        mmmOnZmask = (mmmOnZmask[mmmOnZmask].counts > 0)
        mmmOffZmask = (mmmOffZmask[mmmOffZmask].counts > 0)

        # Get Z and W invariant masses
        goodPairs_eee = eee_groups[(clos_eee) & (isOSeee)]
        eZ0 = goodPairs_eee.i0[goodPairs_eee.counts > 0].regular(
        )  #[(goodPairs_eee.counts>0)].regular()
        eZ1 = goodPairs_eee.i1[goodPairs_eee.counts > 0].regular(
        )  #[(goodPairs_eee.counts>0)].regular()
        goodPairs_mmm = mmm_groups[(clos_mmm) & (isOSmmm)]
        mZ0 = goodPairs_mmm.i0[goodPairs_mmm.counts > 0].regular(
        )  #[(goodPairs_eee.counts>0)].regular()
        mZ1 = goodPairs_mmm.i1[goodPairs_mmm.counts > 0].regular(
        )  #[(goodPairs_eee.counts>0)].regular()

        eee_reg = eee[(eeeOnZmask)].regular()
        eW = np.append(eee_reg, eZ0, axis=1)
        eW = np.append(eW, eZ1, axis=1)
        eWmask = np.apply_along_axis(
            lambda a: [list(a).count(x) == 1 for x in a], 1, eW)
        eW = eW[eWmask]
        mmm_reg = mmm[(mmmOnZmask)].regular()
        mW = np.append(mmm_reg, mZ0, axis=1)
        mW = np.append(mW, mZ1, axis=1)
        mWmask = np.apply_along_axis(
            lambda a: [list(a).count(x) == 1 for x in a], 1, mW)
        mW = mW[mWmask]

        eZ = [x + y for x, y in zip(eZ0, eZ1)]
        triElec = [x + y for x, y in zip(eZ, eW)]
        mZ_eee = [t[0].mass for t in eZ]
        m3l_eee = [t[0].mass for t in triElec]
        mZ = [x + y for x, y in zip(mZ0, mZ1)]
        triMuon = [x + y for x, y in zip(mZ, mW)]
        mZ_mmm = [t[0].mass for t in mZ]
        m3l_mmm = [t[0].mass for t in triMuon]

        # Triggers
        #passTrigger = lambda df, n, m, o : np.ones_like(df['MET_pt'], dtype=np.bool) # XXX
        trig_eeSS = passTrigger(df, 'ee', isData, dataset)
        trig_mmSS = passTrigger(df, 'mm', isData, dataset)
        trig_emSS = passTrigger(df, 'em', isData, dataset)
        trig_eee = passTrigger(df, 'eee', isData, dataset)
        trig_mmm = passTrigger(df, 'mmm', isData, dataset)
        trig_eem = passTrigger(df, 'eem', isData, dataset)
        trig_mme = passTrigger(df, 'mme', isData, dataset)

        # MET filters

        # Weights
        genw = np.ones_like(df['MET_pt']) if isData else df['genWeight']
        weights = processor.Weights(df.size)
        weights.add('norm', genw if isData else (xsec / sow) * genw)

        # Selections and cuts
        selections = processor.PackedSelection()
        channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS']
        selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS))
        selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS))
        selections.add('emSS', (emSSmask) & (trig_emSS))

        channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ']
        selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem))
        selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem))
        selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme))
        selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme))

        channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ']
        selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee))
        selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee))
        selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm))
        selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm))

        levels = ['base', '2jets', '4jets', '4j1b', '4j2b']
        selections.add('base', (nElec + nMuon >= 2))
        selections.add('2jets', (njets >= 2))
        selections.add('4jets', (njets >= 4))
        selections.add('4j1b', (njets >= 4) & (nbtags >= 1))
        selections.add('4j2b', (njets >= 4) & (nbtags >= 2))

        # Variables
        invMass_eeSSonZ = (eeSSonZ.i0 + eeSSonZ.i1).mass
        invMass_eeSSoffZ = (eeSSoffZ.i0 + eeSSoffZ.i1).mass
        invMass_mmSSonZ = (mmSSonZ.i0 + mmSSonZ.i1).mass
        invMass_mmSSoffZ = (mmSSoffZ.i0 + mmSSoffZ.i1).mass
        invMass_emSS = (emSS.i0 + emSS.i1).mass

        varnames = {}
        varnames['met'] = met.pt
        varnames['ht'] = ht
        varnames['njets'] = njets
        varnames['nbtags'] = nbtags
        varnames['invmass'] = {
            'eeSSonZ': invMass_eeSSonZ,
            'eeSSoffZ': invMass_eeSSoffZ,
            'mmSSonZ': invMass_mmSSonZ,
            'mmSSoffZ': invMass_mmSSoffZ,
            'emSS': invMass_emSS,
            'eemSSonZ': mZ_eem,
            'eemSSoffZ': mZ_eem,
            'mmeSSonZ': mZ_mme,
            'mmeSSoffZ': mZ_mme,
            'eeeSSonZ': mZ_eee,
            'eeeSSoffZ': mZ_eee,
            'mmmSSonZ': mZ_mmm,
            'mmmSSoffZ': mZ_mmm,
        }
        varnames['m3l'] = {
            'eemSSonZ': m3l_eem,
            'eemSSoffZ': m3l_eem,
            'mmeSSonZ': m3l_mme,
            'mmeSSoffZ': m3l_mme,
            'eeeSSonZ': m3l_eee,
            'eeeSSoffZ': m3l_eee,
            'mmmSSonZ': m3l_mmm,
            'mmmSSoffZ': m3l_mmm,
        }
        varnames['e0pt'] = e0.pt
        varnames['e0eta'] = e0.eta
        varnames['m0pt'] = m0.pt
        varnames['m0eta'] = m0.eta
        varnames['j0pt'] = j0.pt
        varnames['j0eta'] = j0.eta
        varnames['counts'] = np.ones_like(df['MET_pt'], dtype=np.int)

        # Fill Histos
        hout = self.accumulator.identity()
        hout['dummy'].fill(sample=dataset, dummy=1, weight=df.size)

        for var, v in varnames.items():
            for ch in channels2LSS + channels3L:
                for lev in levels:
                    weight = weights.weight()
                    cuts = [ch] + [lev]
                    cut = selections.all(*cuts)
                    weights_flat = weight[cut].flatten()
                    weights_ones = np.ones_like(weights_flat, dtype=np.int)
                    if var == 'invmass':
                        if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue
                        elif ch in ['eeeSSonZ', 'mmmSSonZ']:
                            continue  #values = v[ch]
                        else:
                            values = v[ch][cut].flatten()
                        hout['invmass'].fill(sample=dataset,
                                             channel=ch,
                                             cut=lev,
                                             invmass=values,
                                             weight=weights_flat)
                    elif var == 'm3l':
                        if ch in [
                                'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ',
                                'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ',
                                'mmmSSonZ'
                        ]:
                            continue
                        values = v[ch][cut].flatten()
                        hout['m3l'].fill(sample=dataset,
                                         channel=ch,
                                         cut=lev,
                                         m3l=values,
                                         weight=weights_flat)
                    else:
                        values = v[cut].flatten()
                        if var == 'ht':
                            hout[var].fill(ht=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'met':
                            hout[var].fill(met=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'njets':
                            hout[var].fill(njets=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'nbtags':
                            hout[var].fill(nbtags=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'counts':
                            hout[var].fill(counts=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_ones)
                        elif var == 'e0pt':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            hout[var].fill(e0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0pt':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            hout[var].fill(m0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'e0eta':
                            if ch in [
                                    'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ',
                                    'mmmSSonZ'
                            ]:
                                continue
                            hout[var].fill(e0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'm0eta':
                            if ch in [
                                    'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ',
                                    'eeeSSonZ'
                            ]:
                                continue
                            hout[var].fill(m0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0pt':
                            if lev == 'base': continue
                            hout[var].fill(j0pt=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)
                        elif var == 'j0eta':
                            if lev == 'base': continue
                            hout[var].fill(j0eta=values,
                                           sample=dataset,
                                           channel=ch,
                                           cut=lev,
                                           weight=weights_flat)

        return hout
コード例 #30
0
    def process(self, events):

        dataset = events.metadata['dataset']

        selected_regions = []
        for region, samples in self._samples.items():
            for sample in samples:
                if sample not in dataset: continue
                selected_regions.append(region)

        isData = 'genWeight' not in events.columns
        selection = processor.PackedSelection()
        weights = {}
        hout = self.accumulator.identity()

        ###
        #Getting corrections, ids from .coffea files
        ###   Sunil Need to check  why we need corrections

        #get_msd_weight          = self._corrections['get_msd_weight']
        get_ttbar_weight        = self._corrections['get_ttbar_weight']
        get_nlo_weight          = self._corrections['get_nlo_weight'][self._year]         
        get_nnlo_weight         = self._corrections['get_nnlo_weight']
        get_nnlo_nlo_weight     = self._corrections['get_nnlo_nlo_weight']
        get_adhoc_weight        = self._corrections['get_adhoc_weight']
        get_pu_weight           = self._corrections['get_pu_weight'][self._year]          
        get_met_trig_weight     = self._corrections['get_met_trig_weight'][self._year]    
        get_met_zmm_trig_weight = self._corrections['get_met_zmm_trig_weight'][self._year]
        get_ele_trig_weight     = self._corrections['get_ele_trig_weight'][self._year]    
        get_pho_trig_weight     = self._corrections['get_pho_trig_weight'][self._year]    
        get_ele_loose_id_sf     = self._corrections['get_ele_loose_id_sf'][self._year]
        get_ele_tight_id_sf     = self._corrections['get_ele_tight_id_sf'][self._year]
        get_ele_loose_id_eff    = self._corrections['get_ele_loose_id_eff'][self._year]
        get_ele_tight_id_eff    = self._corrections['get_ele_tight_id_eff'][self._year]
        get_pho_tight_id_sf     = self._corrections['get_pho_tight_id_sf'][self._year]
        get_mu_tight_id_sf      = self._corrections['get_mu_tight_id_sf'][self._year]
        get_mu_loose_id_sf      = self._corrections['get_mu_loose_id_sf'][self._year]
        get_ele_reco_sf         = self._corrections['get_ele_reco_sf'][self._year]
        get_mu_tight_iso_sf     = self._corrections['get_mu_tight_iso_sf'][self._year]
        get_mu_loose_iso_sf     = self._corrections['get_mu_loose_iso_sf'][self._year]
        get_ecal_bad_calib      = self._corrections['get_ecal_bad_calib']
        get_deepflav_weight     = self._corrections['get_btag_weight']['deepflav'][self._year]
        Jetevaluator            = self._corrections['Jetevaluator']
        
        isLooseElectron = self._ids['isLooseElectron'] 
        isTightElectron = self._ids['isTightElectron'] 
        isLooseMuon     = self._ids['isLooseMuon']     
        isTightMuon     = self._ids['isTightMuon']     
        isLooseTau      = self._ids['isLooseTau']      
        isLoosePhoton   = self._ids['isLoosePhoton']   
        isTightPhoton   = self._ids['isTightPhoton']   
        isGoodJet       = self._ids['isGoodJet']       
        #isGoodFatJet    = self._ids['isGoodFatJet']    
        isHEMJet        = self._ids['isHEMJet']        
        
        match = self._common['match']
        deepflavWPs = self._common['btagWPs']['deepflav'][self._year]
        deepcsvWPs = self._common['btagWPs']['deepcsv'][self._year]

        ###
        # Derive jet corrector for JEC/JER
        ###
        
        JECcorrector = FactorizedJetCorrector(**{name: Jetevaluator[name] for name in self._jec[self._year]})
        JECuncertainties = JetCorrectionUncertainty(**{name:Jetevaluator[name] for name in self._junc[self._year]})
        JER = JetResolution(**{name:Jetevaluator[name] for name in self._jr[self._year]})
        JERsf = JetResolutionScaleFactor(**{name:Jetevaluator[name] for name in self._jersf[self._year]})
        Jet_transformer = JetTransformer(jec=JECcorrector,junc=JECuncertainties, jer = JER, jersf = JERsf)
        
        ###
        #Initialize global quantities (MET ecc.)
        ###

        met = events.MET
        met['T']  = TVector2Array.from_polar(met.pt, met.phi)
        met['p4'] = TLorentzVectorArray.from_ptetaphim(met.pt, 0., met.phi, 0.)
        calomet = events.CaloMET

        ###
        #Initialize physics objects
        ###

        e = events.Electron
        e['isloose'] = isLooseElectron(e.pt,e.eta,e.dxy,e.dz,e.cutBased,self._year)
        e['istight'] = isTightElectron(e.pt,e.eta,e.dxy,e.dz,e.cutBased,self._year)
        e['T'] = TVector2Array.from_polar(e.pt, e.phi)
        #e['p4'] = TLorentzVectorArray.from_ptetaphim(e.pt, e.eta, e.phi, e.mass)
        e_loose = e[e.isloose.astype(np.bool)]
        e_tight = e[e.istight.astype(np.bool)]
        e_ntot = e.counts
        e_nloose = e_loose.counts
        e_ntight = e_tight.counts
        leading_e = e[e.pt.argmax()]
        leading_e = leading_e[leading_e.istight.astype(np.bool)]

        mu = events.Muon
        mu['isloose'] = isLooseMuon(mu.pt,mu.eta,mu.pfRelIso04_all,mu.looseId,self._year)
        mu['istight'] = isTightMuon(mu.pt,mu.eta,mu.pfRelIso04_all,mu.tightId,self._year)
        mu['T'] = TVector2Array.from_polar(mu.pt, mu.phi)
        #mu['p4'] = TLorentzVectorArray.from_ptetaphim(mu.pt, mu.eta, mu.phi, mu.mass)
        mu_loose=mu[mu.isloose.astype(np.bool)]
        mu_tight=mu[mu.istight.astype(np.bool)]
        mu_ntot = mu.counts
        mu_nloose = mu_loose.counts
        mu_ntight = mu_tight.counts
        leading_mu = mu[mu.pt.argmax()]
        leading_mu = leading_mu[leading_mu.istight.astype(np.bool)]

        tau = events.Tau
        tau['isclean']=~match(tau,mu_loose,0.5)&~match(tau,e_loose,0.5)
        tau['isloose']=isLooseTau(tau.pt,tau.eta,tau.idDecayMode,tau.idMVAoldDM2017v2,self._year)
        tau_clean=tau[tau.isclean.astype(np.bool)]
        tau_loose=tau_clean[tau_clean.isloose.astype(np.bool)]
        tau_ntot=tau.counts
        tau_nloose=tau_loose.counts

        pho = events.Photon
        pho['isclean']=~match(pho,mu_loose,0.5)&~match(pho,e_loose,0.5)
        _id = 'cutBasedBitmap'
        if self._year=='2016': _id = 'cutBased'
        pho['isloose']=isLoosePhoton(pho.pt,pho.eta,pho[_id],self._year)
        pho['istight']=isTightPhoton(pho.pt,pho.eta,pho[_id],self._year)
        pho['T'] = TVector2Array.from_polar(pho.pt, pho.phi)
        #pho['p4'] = TLorentzVectorArray.from_ptetaphim(pho.pt, pho.eta, pho.phi, pho.mass)
        pho_clean=pho[pho.isclean.astype(np.bool)]
        pho_loose=pho_clean[pho_clean.isloose.astype(np.bool)]
        pho_tight=pho_clean[pho_clean.istight.astype(np.bool)]
        pho_ntot=pho.counts
        pho_nloose=pho_loose.counts
        pho_ntight=pho_tight.counts
        leading_pho = pho[pho.pt.argmax()]
        leading_pho = leading_pho[leading_pho.isclean.astype(np.bool)]
        leading_pho = leading_pho[leading_pho.istight.astype(np.bool)]

        j = events.Jet
        j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId, j.neHEF, j.neEmEF, j.chHEF, j.chEmEF)
        j['isHEM'] = isHEMJet(j.pt, j.eta, j.phi)
        j['isclean'] = ~match(j,e_loose,0.4)&~match(j,mu_loose,0.4)&~match(j,pho_loose,0.4)
        #j['isiso'] = ~match(j,fj_clean,1.5)   # What is this ?????
        j['isdcsvL'] = (j.btagDeepB>deepcsvWPs['loose'])
        j['isdflvL'] = (j.btagDeepFlavB>deepflavWPs['loose'])
        j['T'] = TVector2Array.from_polar(j.pt, j.phi)
        j['p4'] = TLorentzVectorArray.from_ptetaphim(j.pt, j.eta, j.phi, j.mass)
        j['ptRaw'] =j.pt * (1-j.rawFactor)
        j['massRaw'] = j.mass * (1-j.rawFactor)
        j['rho'] = j.pt.ones_like()*events.fixedGridRhoFastjetAll.array
        j_good = j[j.isgood.astype(np.bool)]
        j_clean = j_good[j_good.isclean.astype(np.bool)]  # USe this instead of j_iso Sunil
        #j_iso = j_clean[j_clean.isiso.astype(np.bool)]
        j_iso = j_clean[j_clean.astype(np.bool)]    #Sunil changed  
        j_dcsvL = j_iso[j_iso.isdcsvL.astype(np.bool)]
        j_dflvL = j_iso[j_iso.isdflvL.astype(np.bool)]
        j_HEM = j[j.isHEM.astype(np.bool)]
        j_ntot=j.counts
        j_ngood=j_good.counts
        j_nclean=j_clean.counts
        j_niso=j_iso.counts
        j_ndcsvL=j_dcsvL.counts
        j_ndflvL=j_dflvL.counts
        j_nHEM = j_HEM.counts
        leading_j = j[j.pt.argmax()]
        leading_j = leading_j[leading_j.isgood.astype(np.bool)]
        leading_j = leading_j[leading_j.isclean.astype(np.bool)]

        ###
        #Calculating derivatives
        ###

        ele_pairs = e_loose.distincts()
        diele = ele_pairs.i0+ele_pairs.i1
        diele['T'] = TVector2Array.from_polar(diele.pt, diele.phi)
        leading_ele_pair = ele_pairs[diele.pt.argmax()]
        leading_diele = diele[diele.pt.argmax()]

        mu_pairs = mu_loose.distincts()
        dimu = mu_pairs.i0+mu_pairs.i1
        dimu['T'] = TVector2Array.from_polar(dimu.pt, dimu.phi)
        leading_mu_pair = mu_pairs[dimu.pt.argmax()]
        leading_dimu = dimu[dimu.pt.argmax()]

        ###
        # Calculate recoil
        ###   HT,  LT, dPhi,  mT_{W}, MT_misET

        um = met.T+leading_mu.T.sum()
        ue = met.T+leading_e.T.sum()
        umm = met.T+leading_dimu.T.sum()
        uee = met.T+leading_diele.T.sum()
        ua = met.T+leading_pho.T.sum()
        #Need  help from Matteo
        u = {}
        u['sr']=met.T
        u['wecr']=ue
        u['tecr']=ue
        u['wmcr']=um
        u['tmcr']=um
        u['zecr']=uee
        u['zmcr']=umm
        u['gcr']=ua

        ###
        #Calculating weights
        ###
        if not isData:
            
            ###
            # JEC/JER
            ###

            #j['ptGenJet'] = j.matched_gen.pt
            #Jet_transformer.transform(j)

            gen = events.GenPart
            
            #Need to understand this part Sunil
            gen['isb'] = (abs(gen.pdgId)==5)&gen.hasFlags(['fromHardProcess', 'isLastCopy'])
            gen['isc'] = (abs(gen.pdgId)==4)&gen.hasFlags(['fromHardProcess', 'isLastCopy'])

            gen['isTop'] = (abs(gen.pdgId)==6)&gen.hasFlags(['fromHardProcess', 'isLastCopy'])
            gen['isW'] = (abs(gen.pdgId)==24)&gen.hasFlags(['fromHardProcess', 'isLastCopy'])
            gen['isZ'] = (abs(gen.pdgId)==23)&gen.hasFlags(['fromHardProcess', 'isLastCopy'])
            gen['isA'] = (abs(gen.pdgId)==22)&gen.hasFlags(['fromHardProcess', 'isLastCopy'])

            genTops = gen[gen.isTop]
            genWs = gen[gen.isW]
            genZs = gen[gen.isZ]
            genAs = gen[gen.isA]

            nlo  = np.ones(events.size)
            nnlo = np.ones(events.size)
            nnlo_nlo = np.ones(events.size)
            adhoc = np.ones(events.size)
            if('TTJets' in dataset): 
                nlo = np.sqrt(get_ttbar_weight(genTops[:,0].pt.sum()) * get_ttbar_weight(genTops[:,1].pt.sum()))
            #elif('GJets' in dataset): 
            #    nlo = get_nlo_weight['a'](genAs.pt.max())
            elif('WJets' in dataset): 
                #nlo = get_nlo_weight['w'](genWs.pt.max())
                #if self._year != '2016': adhoc = get_adhoc_weight['w'](genWs.pt.max())
                #nnlo = get_nnlo_weight['w'](genWs.pt.max())
                nnlo_nlo = get_nnlo_nlo_weight['w'](genWs.pt.max())*(genWs.pt.max()>100).astype(np.int) + (genWs.pt.max()<=100).astype(np.int)
            elif('DY' in dataset): 
                #nlo = get_nlo_weight['z'](genZs.pt.max())
                #if self._year != '2016': adhoc = get_adhoc_weight['z'](genZs.pt.max())
                #nnlo = get_nnlo_weight['dy'](genZs.pt.max())
                nnlo_nlo = get_nnlo_nlo_weight['dy'](genZs.pt.max())*(genZs.pt.max()>100).astype(np.int) + (genZs.pt.max()<=100).astype(np.int)
            elif('ZJets' in dataset): 
                #nlo = get_nlo_weight['z'](genZs.pt.max())
                #if self._year != '2016': adhoc = get_adhoc_weight['z'](genZs.pt.max())
                #nnlo = get_nnlo_weight['z'](genZs.pt.max())
                nnlo_nlo = get_nnlo_nlo_weight['z'](genZs.pt.max())*(genZs.pt.max()>100).astype(np.int) + (genZs.pt.max()<=100).astype(np.int)

            ###
            # Calculate PU weight and systematic variations
            ###

            pu = get_pu_weight['cen'](events.PV.npvs)
            #puUp = get_pu_weight['up'](events.PV.npvs)
            #puDown = get_pu_weight['down'](events.PV.npvs)

            ###
            # Trigger efficiency weight
            ###
            
            ele1_trig_weight = get_ele_trig_weight(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum())
            ele2_trig_weight = get_ele_trig_weight(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum())

            # Need Help from Matteo
            trig = {}

            trig['sre'] = get_ele_trig_weight(leading_e.eta.sum(), leading_e.pt.sum()) 
            trig['srm'] = #Need  be fixed  in Util first 
            trig['ttbare'] = get_ele_trig_weight(leading_e.eta.sum(), leading_e.pt.sum())
            trig['ttbarm'] = #Need  be fixed  in Util first 
            trig['wjete'] = get_ele_trig_weight(leading_e.eta.sum(), leading_e.pt.sum())
            trig['wjetm'] = #Need  be fixed  in Util first 
            trig['dilepe'] = 1 - (1-ele1_trig_weight)*(1-ele2_trig_weight)  
            #trig['dilepm'] =  Need  be fixed  in Util first 

            # For muon ID weights, SFs are given as a function of abs(eta), but in 2016
            ##

            mueta = abs(leading_mu.eta.sum())
            mu1eta=abs(leading_mu_pair.i0.eta.sum())
            mu2eta=abs(leading_mu_pair.i1.eta.sum())
            if self._year=='2016':
                mueta=leading_mu.eta.sum()
                mu1eta=leading_mu_pair.i0.eta.sum()
                mu2eta=leading_mu_pair.i1.eta.sum()

            ### 
            # Calculating electron and muon ID SF and efficiencies (when provided)
            ###

            mu1Tsf = get_mu_tight_id_sf(mu1eta,leading_mu_pair.i0.pt.sum())
            mu2Tsf = get_mu_tight_id_sf(mu2eta,leading_mu_pair.i1.pt.sum())
            mu1Lsf = get_mu_loose_id_sf(mu1eta,leading_mu_pair.i0.pt.sum())
            mu2Lsf = get_mu_loose_id_sf(mu2eta,leading_mu_pair.i1.pt.sum())
    
            e1Tsf  = get_ele_tight_id_sf(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum())
            e2Tsf  = get_ele_tight_id_sf(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum())
            e1Lsf  = get_ele_loose_id_sf(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum())
            e2Lsf  = get_ele_loose_id_sf(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum())

            e1Teff= get_ele_tight_id_eff(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum())
            e2Teff= get_ele_tight_id_eff(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum())
            e1Leff= get_ele_loose_id_eff(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum())
            e2Leff= get_ele_loose_id_eff(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum())

            # Need Help from  Matteo
            ids={}
            ids['sre'] = get_ele_tight_id_sf(leading_e.eta.sum(),leading_e.pt.sum())
            ids['srm'] = get_mu_tight_id_sf(mueta,leading_mu.pt.sum())
            ids['ttbare'] = get_ele_tight_id_sf(leading_e.eta.sum(),leading_e.pt.sum())
            ids['ttbarm'] = get_mu_tight_id_sf(mueta,leading_mu.pt.sum())
            ids['wjete'] = get_ele_tight_id_sf(leading_e.eta.sum(),leading_e.pt.sum())
            ids['wjetm'] = get_mu_tight_id_sf(mueta,leading_mu.pt.sum())
            ids['dilepe'] = e1Lsf*e2Lsf
            ids['dilepm'] = mu1Lsf*mu2Lsf


            ###
            # Reconstruction weights for electrons
            ###
            
            e1sf_reco = get_ele_reco_sf(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum())
            e2sf_reco = get_ele_reco_sf(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum())
            
            # Need Help from  Matteo 

            reco = {}
            reco['sre'] = get_ele_reco_sf(leading_e.eta.sum(),leading_e.pt.sum())
            reco['srm'] = np.ones(events.size)
            reco['ttbare'] = get_ele_reco_sf(leading_e.eta.sum(),leading_e.pt.sum())
            reco['ttbarm'] = np.ones(events.size)
            reco['wjete'] = get_ele_reco_sf(leading_e.eta.sum(),leading_e.pt.sum())
            reco['wjetm'] = np.ones(events.size)
            reco['dilepe'] = e1sf_reco * e2sf_reco
            reco['dilepm'] = np.ones(events.size)

            ###
            # Isolation weights for muons
            ###

            mu1Tsf_iso = get_mu_tight_iso_sf(mu1eta,leading_mu_pair.i0.pt.sum())
            mu2Tsf_iso = get_mu_tight_iso_sf(mu2eta,leading_mu_pair.i1.pt.sum())
            mu1Lsf_iso = get_mu_loose_iso_sf(mu1eta,leading_mu_pair.i0.pt.sum())
            mu2Lsf_iso = get_mu_loose_iso_sf(mu2eta,leading_mu_pair.i1.pt.sum())

            # Need Help from  Matteo 

            isolation = {}
            isolation['sre'] = np.ones(events.size)
            isolation['srm'] = get_mu_tight_iso_sf(mueta,leading_mu.pt.sum())
            isolation['ttbare'] = np.ones(events.size)
            isolation['ttbarm'] = get_mu_tight_iso_sf(mueta,leading_mu.pt.sum())
            isolation['wjete'] = np.ones(events.size)
            isolation['wjetm'] = get_mu_tight_iso_sf(mueta,leading_mu.pt.sum())
            isolation['dilepe'] = np.ones(events.size)
            isolation['dilepm'] = mu1Lsf_iso*mu2Lsf_iso


            ###
            # AK4 b-tagging weights
            ###

            btag = {}
            btagUp = {}
            btagDown = {}
            # Need Help from  Matteo  
            btag['sr'],   btagUp['sr'],   btagDown['sr']   = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0')
            btag['wmcr'], btagUp['wmcr'], btagDown['wmcr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0')
            btag['tmcr'], btagUp['tmcr'], btagDown['tmcr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'-1')
            btag['wecr'], btagUp['wecr'], btagDown['wecr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0')
            btag['tecr'], btagUp['tecr'], btagDown['tecr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'-1')
            btag['zmcr'], btagUp['zmcr'], btagDown['zmcr'] = np.ones(events.size), np.ones(events.size), np.ones(events.size)#get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0')
            btag['zecr'], btagUp['zecr'], btagDown['zecr'] = np.ones(events.size), np.ones(events.size), np.ones(events.size)#get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0')
            btag['gcr'],  btagUp['gcr'],  btagDown['gcr']  = np.ones(events.size), np.ones(events.size), np.ones(events.size)#get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0')
            
            for r in selected_regions:
                weights[r] = processor.Weights(len(events))
                weights[r].add('genw',events.genWeight)
                weights[r].add('nlo',nlo)
                #weights[r].add('adhoc',adhoc)
                #weights[r].add('nnlo',nnlo)
                weights[r].add('nnlo_nlo',nnlo_nlo)
                weights[r].add('pileup',pu)#,puUp,puDown)
                weights[r].add('trig', trig[r])
                weights[r].add('ids', ids[r])
                weights[r].add('reco', reco[r])
                weights[r].add('isolation', isolation[r])
                weights[r].add('btag',btag[r], btagUp[r], btagDown[r])
                
        #leading_fj = fj[fj.pt.argmax()]
        #leading_fj = leading_fj[leading_fj.isgood.astype(np.bool)]
        #leading_fj = leading_fj[leading_fj.isclean.astype(np.bool)]
        
        ###
        #Importing the MET filters per year from metfilters.py and constructing the filter boolean
        ###

        met_filters =  np.ones(events.size, dtype=np.bool)
        for flag in AnalysisProcessor.met_filter_flags[self._year]:
            met_filters = met_filters & events.Flag[flag]
        selection.add('met_filters',met_filters)

        triggers = np.zeros(events.size, dtype=np.bool)
        for path in self._met_triggers[self._year]:
            if path not in events.HLT.columns: continue
            triggers = triggers | events.HLT[path]
        selection.add('met_triggers', triggers)

        triggers = np.zeros(events.size, dtype=np.bool)
        for path in self._singleelectron_triggers[self._year]:
            if path not in events.HLT.columns: continue
            triggers = triggers | events.HLT[path]
        selection.add('singleelectron_triggers', triggers)

        triggers = np.zeros(events.size, dtype=np.bool)
        for path in self._singlemuon_triggers[self._year]:
            if path not in events.HLT.columns: continue
            triggers = triggers | events.HLT[path]
        selection.add('singlemuon_triggers', triggers)

        triggers = np.zeros(events.size, dtype=np.bool)
        for path in self._singlephoton_triggers[self._year]:
            if path not in events.HLT.columns: continue
            triggers = triggers | events.HLT[path]
        selection.add('singlephoton_triggers', triggers)

        noHEMj = np.ones(events.size, dtype=np.bool)
        if self._year=='2018': noHEMj = (j_nHEM==0)

        selection.add('iszeroL',
                      (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)