Python PackedSelection.require Examples

Programming Language: Python

Namespace/Package Name: coffea.analysis_tools

Class/Type: PackedSelection

Method/Function: require

Examples at hotexamples.com: 20

Python PackedSelection.require - 20 examples found. These are the top rated real world Python examples of coffea.analysis_tools.PackedSelection.require extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PackedSelection(29)

add(27)

require(20)

all(9)

any(1)

Example #1

Show file

def test_packed_selection():
    from coffea.analysis_tools import PackedSelection

    sel = PackedSelection()

    shape = (10, )
    all_true = np.full(shape=shape, fill_value=True, dtype=np.bool)
    all_false = np.full(shape=shape, fill_value=False, dtype=np.bool)
    fizz = np.arange(shape[0]) % 3 == 0
    buzz = np.arange(shape[0]) % 5 == 0
    ones = np.ones(shape=shape, dtype=np.uint64)
    wrong_shape = ones = np.ones(shape=(shape[0] - 5, ), dtype=np.bool)

    sel.add("all_true", all_true)
    sel.add("all_false", all_false)
    sel.add("fizz", fizz)
    sel.add("buzz", buzz)

    assert np.all(sel.require(all_true=True, all_false=False) == all_true)
    # allow truthy values
    assert np.all(sel.require(all_true=1, all_false=0) == all_true)
    assert np.all(sel.all("all_true", "all_false") == all_false)
    assert np.all(sel.any("all_true", "all_false") == all_true)
    assert np.all(
        sel.all("fizz", "buzz") == np.array([
            True, False, False, False, False, False, False, False, False, False
        ]))
    assert np.all(
        sel.any("fizz", "buzz") == np.array(
            [True, False, False, True, False, True, True, False, False, True]))

    with pytest.raises(ValueError):
        sel.add("wrong_shape", wrong_shape)

    with pytest.raises(ValueError):
        sel.add("ones", ones)

    with pytest.raises(RuntimeError):
        overpack = PackedSelection()
        for i in range(65):
            overpack.add("sel_%d", all_true)

Example #2

Show file

File: test_analysis_tools.py Project: yihui-lai/coffea

def test_packed_selection():
    from coffea.analysis_tools import PackedSelection

    sel = PackedSelection()

    counts, test_eta, test_pt = dummy_jagged_eta_pt()

    all_true = np.full(shape=counts.shape, fill_value=True, dtype=np.bool)
    all_false = np.full(shape=counts.shape, fill_value=False, dtype=np.bool)
    ones = np.ones(shape=counts.shape, dtype=np.uint64)
    wrong_shape = ones = np.ones(shape=(counts.shape[0] - 5, ), dtype=np.bool)

    sel.add("all_true", all_true)
    sel.add("all_false", all_false)

    assert np.all(sel.require(all_true=True, all_false=False) == all_true)
    assert np.all(sel.all("all_true", "all_false") == all_false)

    try:
        sel.require(all_true=1, all_false=0)
    except ValueError:
        pass

    try:
        sel.add("wrong_shape", wrong_shape)
    except ValueError:
        pass

    try:
        sel.add("ones", ones)
    except ValueError:
        pass

    try:
        overpack = PackedSelection()
        for i in range(65):
            overpack.add("sel_%d", all_true)
    except RuntimeError:
        pass

Example #3

Show file

File: SS_selection.py Project: cjmcmahon1/tW_scattering

def SS_selection(lep1, lep2):
    selection = PackedSelection()

    is_dilep   = ((ak.num(lep1) + ak.num(lep2))==2)
    pos_charge = ((ak.sum(lep1.pdgId, axis=1) + ak.sum(lep2.pdgId, axis=1))<0)
    neg_charge = ((ak.sum(lep1.pdgId, axis=1) + ak.sum(lep2.pdgId, axis=1))>0)

    dilep2    = choose(lep2, 2)
    dilep1   = choose(lep1, 2)
    dilep   = cross(lep2, lep1)

    is_SS = ( ak.any((dilep2['0'].charge * dilep2['1'].charge)>0, axis=1) | \
              ak.any((dilep1['0'].charge * dilep1['1'].charge)>0, axis=1) | \
              ak.any((dilep['0'].charge * dilep['1'].charge)>0, axis=1) )

    selection.add('SS', is_SS)
    ss_reqs = ['SS']

    ss_reqs_d = {sel: True for sel in ss_reqs}
    ss_selection = selection.require(**ss_reqs_d)
    return ss_selection

Example #4

Show file

File: systematic_processor.py Project: JacksonWallace/tW_scattering

    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tight").get()
        vetomuon = Collections(ev, "Muon", "veto").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        vetoelectron = Collections(ev, "Electron", "veto").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7)
        jet = jet[(jet.pt > 25) & (jet.jetId > 1)]
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)
        triggers = getTriggers(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(electron) == 1) & (ak.num(muon) == 1))
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(vetoelectron) + ak.num(vetomuon)) == 2)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            ## PU weight - not in the babies...
            #weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)

            # b-tag SFs
            #weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('trigger', (triggers))
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('OS', OSlepton)
        selection.add('N_jet>2', (ak.num(jet) >= 3))
        selection.add('MET>30', (ev.MET.pt > 30))

        os_reqs = [
            'lepveto', 'dilep', 'trigger', 'filter', 'p_T(lep0)>25',
            'p_T(lep1)>20', 'OS'
        ]
        bl_reqs = os_reqs + ['N_jet>2', 'MET>30']

        os_reqs_d = {sel: True for sel in os_reqs}
        os_selection = selection.require(**os_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        cutflow = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in bl_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow(req, selection.require(**cutflow_reqs_d))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[os_selection].npvs,
                               weight=weight.weight()[os_selection])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_selection].npvsGood,
                                   weight=weight.weight()[os_selection])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_selection],
                             weight=weight.weight()[os_selection])

        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[os_selection].pt,
                           phi=ev.MET[os_selection].phi,
                           weight=weight.weight()[os_selection])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        # Now, take care of systematic unceratinties
        if not dataset == 'MuonEG':
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet_var = getPtEtaPhi(alljets, pt_var=var)
                jet_var = jet_var[(jet_var.pt > 25)]
                jet_var = jet_var[~match(
                    jet_var, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet_var = jet_var[~match(
                    jet_var, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                # get the modified selection -> more difficult
                selection.add(
                    'N_jet>2_' + var, (ak.num(jet_var.pt) > 3)
                )  # something needs to be improved with getPtEtaPhi function
                selection.add('MET>30_' + var, (getattr(ev.MET, var) > 30))

                bl_reqs = os_reqs + ['N_jet>2_' + var, 'MET>30_' + var]
                bl_reqs_d = {sel: True for sel in bl_reqs}
                BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(jet_var)[os_selection],
                    weight=weight.weight()[os_selection])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet_var.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet_var.eta[:,
                                                                    0:1][BL]),
                                         phi=ak.flatten(jet_var.phi[:,
                                                                    0:1][BL]),
                                         weight=weight.weight()[BL])

        return output

Example #5

Show file

    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.miniPFRelIso_all < 0.12)
                            & (electron.pt > 20) & (abs(electron.eta) < 2.4)]

        gen_matched_electron = electron[((electron.genPartIdx >= 0) & (abs(
            electron.matched_gen.pdgId) == 11))]
        n_gen = ak.num(gen_matched_electron)

        is_flipped = ((gen_matched_electron.matched_gen.pdgId *
                       (-1) == gen_matched_electron.pdgId) &
                      (abs(gen_matched_electron.pdgId) == 11))

        #is_flipped = (abs(ev.GenPart[gen_matched_electron.genPartIdx].pdgId) == abs(gen_matched_electron.pdgId))&(ev.GenPart[gen_matched_electron.genPartIdx].pdgId/abs(ev.GenPart[gen_matched_electron.genPartIdx].pdgId) != gen_matched_electron.pdgId/abs(gen_matched_electron.pdgId))
        flipped_electron = gen_matched_electron[is_flipped]
        n_flips = ak.num(flipped_electron)

        sielectron = choose(electron, 1)

        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)

        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_ratio(sielectron['0']))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        electr = ((ak.num(electron) == 2))
        ss = (SSelectron)
        gen = (n_gen >= 1)
        flip = (n_flips >= 1)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('electr', electr)
        selection.add('ss', ss)
        selection.add('flip', flip)
        selection.add('gen', gen)

        bl_reqs = ['filter', 'electr']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        f_reqs = bl_reqs + ['gen', 'flip']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)

        #outputs
        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[flip_sel],
                             weight=weight.weight()[flip_sel])
        output['electron_flips'].fill(dataset=dataset,
                                      multiplicity=n_flips[flip_sel],
                                      weight=weight.weight()[flip_sel])

        output['N_ele2'].fill(dataset=dataset,
                              multiplicity=ak.num(electron)[baseline],
                              weight=weight2.weight()[baseline])
        output['electron_flips2'].fill(dataset=dataset,
                                       multiplicity=n_flips[baseline],
                                       weight=weight2.weight()[baseline])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(flipped_electron[flip_sel].pt)),
            eta=ak.to_numpy(ak.flatten(abs(flipped_electron[flip_sel].eta))),
            #phi = ak.to_numpy(ak.flatten(leading_electron[baseline].phi)),
            weight=weight.weight()[flip_sel])

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[baseline].pt)),
            eta=ak.to_numpy(ak.flatten(abs(leading_electron[baseline].eta))),
            #phi = ak.to_numpy(ak.flatten(leading_electron[baseline].phi)),
            weight=weight2.weight()[baseline])

        return output

Example #6

Show file

    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 15) & (np.abs(electron.eta) < 2.4)]

        electron = electron[(electron.genPartIdx >= 0)]
        electron = electron[(np.abs(electron.matched_gen.pdgId) == 11
                             )]  #from here on all leptons are gen-matched
        electron = electron[((electron.genPartFlav == 1) |
                             (electron.genPartFlav
                              == 15))]  #and now they are all prompt

        is_flipped = (((electron.matched_gen.pdgId * (-1) == electron.pdgId) |
                       (find_first_parent(electron.matched_gen) *
                        (-1) == electron.pdgId)) &
                      (np.abs(electron.pdgId) == 11))

        flipped_electron = electron[is_flipped]
        n_flips = ak.num(flipped_electron)

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        leading_flipped_electron_idx = ak.singletons(
            ak.argmax(flipped_electron.pt, axis=1))
        leading_flipped_electron = electron[leading_flipped_electron_idx]

        def getMVAscore(electron):
            if self.year == 2016:
                MVA = electron.mvaSpring16GP
                return MVA
            elif self.year == 2017:
                MVA = electron.mvaFall17V2noIso
                return MVA
            elif self.year == 2018:
                MVA = np.minimum(
                    np.maximum(electron.mvaFall17V2noIso, -1.0 + 1.e-6),
                    1.0 - 1.e-6)
                return -0.5 * np.log(2 / (MVA + 1) - 1)

        # setting up the various weights
        #weight = Weights( len(ev) )

        #if not dataset=='MuonEG':
        # generator weight
        # weight.add("weight", ev.genWeight)

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        electr = ((ak.num(electron) >= 1))
        flip = (n_flips >= 1)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('electr', electr)
        selection.add('flip', flip)

        bl_reqs = ['filter', 'electr']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        f_reqs = bl_reqs + ['flip']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)

        #adjust weights to prevent length mismatch
        #ak_weight_gen = ak.ones_like(electron[baseline].pt) * weight.weight()[baseline]
        #ak_weight_flip = ak.ones_like(flipped_electron[flip_sel].pt) * weight.weight()[flip_sel]

        #output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[baseline], weight=weight.weight()[baseline])
        #output['electron_flips'].fill(dataset=dataset, multiplicity=n_flips[baseline], weight=weight.weight()[baseline])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(electron[baseline].pt)),
            eta=abs(ak.to_numpy(ak.flatten(electron[baseline].eta))),
        )

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(electron[baseline].pt)),
            eta=ak.to_numpy(ak.flatten(electron[baseline].eta)),
        )

        output["flipped_electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(flipped_electron[flip_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(flipped_electron[flip_sel].eta))),
        )

        output["flipped_electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(flipped_electron[flip_sel].pt)),
            eta=ak.to_numpy(ak.flatten(flipped_electron[flip_sel].eta)),
        )

        output["mva_id"].fill(
            dataset=dataset,
            mva_id=ak.to_numpy(ak.flatten(getMVAscore(electron)[baseline])),
            eta=np.abs(ak.to_numpy(ak.flatten(electron.etaSC[baseline]))),
        )

        output["mva_id2"].fill(
            dataset=dataset,
            mva_id=ak.to_numpy(ak.flatten(getMVAscore(electron)[baseline])),
            pt=ak.to_numpy(ak.flatten(electron.pt[baseline])),
        )

        output["isolation"].fill(
            dataset=dataset,
            isolation1=ak.to_numpy(ak.flatten(electron.jetRelIso[baseline])),
            isolation2=ak.to_numpy(ak.flatten(electron.jetPtRelv2[baseline])),
        )

        return output

Example #7

Show file

File: SS_analysis.py Project: JacksonWallace/tW_scattering

    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Generated leptons
        gen_lep = ev.GenL
        leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
        trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
        n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        ## event selectors
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        
        dilep     = ((ak.num(electron) + ak.num(muon))==2)
        pos_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))>0)
        lep0pt    = ((ak.num(electron[(electron.pt>25)]) + ak.num(muon[(muon.pt>25)]))>0)
        lep0pt_40 = ((ak.num(electron[(electron.pt>40)]) + ak.num(muon[(muon.pt>40)]))>0)
        lep0pt_100 = ((ak.num(electron[(electron.pt>100)]) + ak.num(muon[(muon.pt>100)]))>0)
        lep1pt    = ((ak.num(electron[(electron.pt>20)]) + ak.num(muon[(muon.pt>20)]))>1)
        lep1pt_30 = ((ak.num(electron[(electron.pt>30)]) + ak.num(muon[(muon.pt>30)]))>1)
        lepveto   = ((ak.num(vetoelectron) + ak.num(vetomuon))==2)
        
        # define the weight
        weight = Weights( len(ev) )
        
        #mult = 1
        #if dataset=='inclusive': mult = 0.0478/47.448
        #if dataset=='plus': mult = 0.0036/7.205

        if not dataset=='MuonEG':
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        selection = PackedSelection()
        selection.add('lepveto',       lepveto)
        selection.add('dilep',         dilep )
        selection.add('filter',        (filters) )
        selection.add('p_T(lep0)>25',  lep0pt )
        selection.add('p_T(lep0)>40',  lep0pt_40 )
        selection.add('p_T(lep1)>20',  lep1pt )
        selection.add('p_T(lep1)>30',  lep1pt_30 )
        selection.add('SS',            ( SSlepton | SSelectron | SSmuon) )
        selection.add('pos',           ( pos_charge ) )
        selection.add('neg',           ( neg_charge ) )
        selection.add('N_jet>3',       (ak.num(jet)>=4) )
        selection.add('N_jet>4',       (ak.num(jet)>=5) )
        selection.add('N_central>2',   (ak.num(central)>=3) )
        selection.add('N_central>3',   (ak.num(central)>=4) )
        selection.add('N_btag>0',      (ak.num(btag)>=1) )
        selection.add('MET>50',        (ev.MET.pt>50) )
        selection.add('ST',            (st>600) )
        selection.add('N_fwd>0',       (ak.num(fwd)>=1 ))
        selection.add('delta_eta',     (ak.any(delta_eta>2, axis=1) ) )
        selection.add('fwd_p>500',     (ak.any(j_fwd.p>500, axis=1) ) )
        
        ss_reqs = ['lepveto', 'dilep', 'SS', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'N_jet>3', 'N_central>2', 'N_btag>0']
        bl_reqs = ss_reqs + ['N_fwd>0', 'N_jet>4', 'N_central>3', 'ST', 'MET>50', 'delta_eta']
        sr_reqs = bl_reqs + ['fwd_p>500', 'p_T(lep0)>40', 'p_T(lep1)>30']

        ss_reqs_d = { sel: True for sel in ss_reqs }
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = { sel: True for sel in bl_reqs }
        BL = selection.require(**bl_reqs_d)
        sr_reqs_d = { sel: True for sel in sr_reqs }
        SR = selection.require(**sr_reqs_d)

        cutflow     = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in sr_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow( req, selection.require(**cutflow_reqs_d) )
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvs, weight=weight.weight()[ss_selection])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvsGood, weight=weight.weight()[ss_selection])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[ss_selection], weight=weight.weight()[ss_selection])
        output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL])
        output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL])
        output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL])
        output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL])
        output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL])
        output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])
        output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[ss_selection], n2=n_nonprompt[ss_selection], n_ele=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[ss_selection].pt,
            phi  = ev.MET[ss_selection].phi,
            weight = weight.weight()[ss_selection]
        )

        output['lead_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )

        output['trail_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )
        
        
        return output

Example #8

Show file

File: ttbar_alpha_reco.py Project: jdulemba/NanoAOD_Analyses

    def process(self, events):
        np.random.seed(
            10
        )  # sets seed so values from random distributions are reproducible (JER corrections)
        output = self.accumulator.identity()

        self.sample_name = events.metadata['dataset']

        ## make event weights
        # data or MC distinction made internally
        mu_evt_weights = MCWeights.get_event_weights(
            events, year=args.year, corrections=self.corrections)
        el_evt_weights = MCWeights.get_event_weights(
            events, year=args.year, corrections=self.corrections)

        ## initialize selections and regions
        selection = PackedSelection()
        regions = {
            'Muon': {
                'lep_and_filter_pass', 'passing_jets', 'jets_3', 'tight_MU',
                'btag_pass', 'semilep'
            },
            'Electron': {
                'lep_and_filter_pass', 'passing_jets', 'jets_3', 'tight_EL',
                'btag_pass', 'semilep'
            },
        }

        # get all passing leptons
        lep_and_filter_pass = objsel.select_leptons(events, year=args.year)
        selection.add('lep_and_filter_pass', lep_and_filter_pass
                      )  # add passing leptons requirement to all systematics

        ## add different selections
        ## muons
        tight_mu_sel = ak.sum(events['Muon']['TIGHTMU'], axis=1) == 1
        selection.add('tight_MU',
                      tight_mu_sel)  # one muon passing TIGHT criteria
        ## electrons
        tight_el_sel = ak.sum(events['Electron']['TIGHTEL'], axis=1) == 1
        selection.add('tight_EL',
                      tight_el_sel)  # one electron passing TIGHT criteria

        ### apply lepton SFs to MC (only applicable to tight leptons)
        if 'LeptonSF' in self.corrections.keys():
            tight_muons = events['Muon'][tight_mu_sel][(
                events['Muon'][tight_mu_sel]['TIGHTMU'] == True)]
            muSFs_dict = MCWeights.get_lepton_sf(
                year=args.year,
                lepton='Muons',
                corrections=self.corrections['LeptonSF'],
                pt=ak.flatten(tight_muons['pt']),
                eta=ak.flatten(tight_muons['eta']))
            mu_reco_cen = np.ones(len(events))
            mu_reco_err = np.zeros(len(events))
            mu_trig_cen = np.ones(len(events))
            mu_trig_err = np.zeros(len(events))
            mu_reco_cen[tight_mu_sel] = muSFs_dict['RECO_CEN']
            mu_reco_err[tight_mu_sel] = muSFs_dict['RECO_ERR']
            mu_trig_cen[tight_mu_sel] = muSFs_dict['TRIG_CEN']
            mu_trig_err[tight_mu_sel] = muSFs_dict['TRIG_ERR']
            mu_evt_weights.add('Lep_RECO',
                               mu_reco_cen,
                               mu_reco_err,
                               mu_reco_err,
                               shift=True)
            mu_evt_weights.add('Lep_TRIG',
                               mu_trig_cen,
                               mu_trig_err,
                               mu_trig_err,
                               shift=True)

            tight_electrons = events['Electron'][tight_el_sel][(
                events['Electron'][tight_el_sel]['TIGHTEL'] == True)]
            elSFs_dict = MCWeights.get_lepton_sf(
                year=args.year,
                lepton='Electrons',
                corrections=self.corrections['LeptonSF'],
                pt=ak.flatten(tight_electrons['pt']),
                eta=ak.flatten(tight_electrons['etaSC']))
            el_reco_cen = np.ones(len(events))
            el_reco_err = np.zeros(len(events))
            el_trig_cen = np.ones(len(events))
            el_trig_err = np.zeros(len(events))
            el_reco_cen[tight_el_sel] = elSFs_dict['RECO_CEN']
            el_reco_err[tight_el_sel] = elSFs_dict['RECO_ERR']
            el_trig_cen[tight_el_sel] = elSFs_dict['TRIG_CEN']
            el_trig_err[tight_el_sel] = elSFs_dict['TRIG_ERR']
            el_evt_weights.add('Lep_RECO',
                               el_reco_cen,
                               el_reco_err,
                               el_reco_err,
                               shift=True)
            el_evt_weights.add('Lep_TRIG',
                               el_trig_cen,
                               el_trig_err,
                               el_trig_err,
                               shift=True)

            ## build corrected jets and MET
        events['Jet'], events['MET'] = IDJet.process_jets(
            events, args.year, self.corrections['JetCor'])

        # jet selection
        passing_jets = objsel.jets_selection(events, year=args.year)
        selection.add('passing_jets', passing_jets)
        selection.add('jets_3', ak.num(events['SelectedJets']) == 3)
        selection.add('btag_pass',
                      ak.sum(events['SelectedJets'][btag_wps[0]], axis=1) >= 2)

        events['SelectedJets'] = events['SelectedJets'][ak.argsort(
            events['SelectedJets']['btagDeepB'], ascending=False
        )] if btaggers[0] == 'DeepCSV' else events['SelectedJets'][ak.argsort(
            events['SelectedJets']['btagDeepFlavB'], ascending=False)]

        if self.corrections['BTagSF'] == True:
            #set_trace()
            deepcsv_cen = np.ones(len(events))
            threeJets_cut = selection.require(lep_and_filter_pass=True,
                                              passing_jets=True,
                                              jets_3=True)
            deepcsv_3j_wts = self.corrections['BTag_Constructors']['DeepCSV'][
                '3Jets'].get_scale_factor(
                    jets=events['SelectedJets'][threeJets_cut],
                    passing_cut='DeepCSV' + wps_to_use[0])
            deepcsv_cen[threeJets_cut] = ak.prod(deepcsv_3j_wts['central'],
                                                 axis=1)

            # make dict of btag weights
            btag_weights = {
                'DeepCSV_CEN': deepcsv_cen,
            }

            # find gen level particles for ttbar system
        genpsel.select(events, mode='NORMAL')
        selection.add('semilep', ak.num(events['SL']) > 0)
        if 'NNLO_Rewt' in self.corrections.keys():
            nnlo_wts = MCWeights.get_nnlo_weights(
                self.corrections['NNLO_Rewt'], events)
            mu_evt_weights.add(
                '%s_reweighting' % self.corrections['NNLO_Rewt']['Var'],
                nnlo_wts)
            el_evt_weights.add(
                '%s_reweighting' % self.corrections['NNLO_Rewt']['Var'],
                nnlo_wts)

        ## fill hists for each region
        for lepton in regions.keys():
            evt_weights = mu_evt_weights if lepton == 'Muon' else el_evt_weights
            cut = selection.all(*regions[lepton])

            #set_trace()
            if cut.sum() > 0:
                leptype = 'MU' if lepton == 'Muon' else 'EL'
                if 'loose_or_tight_%s' % leptype in regions[lepton]:
                    leptons = events[lepton][cut][(
                        (events[lepton][cut]['TIGHT%s' % leptype] == True) |
                        (events[lepton][cut]['LOOSE%s' % leptype] == True))]
                elif 'tight_%s' % leptype in regions[lepton]:
                    leptons = events[lepton][cut][(
                        events[lepton][cut]['TIGHT%s' % leptype] == True)]
                elif 'loose_%s' % leptype in regions[lepton]:
                    leptons = events[lepton][cut][(
                        events[lepton][cut]['LOOSE%s' % leptype] == True)]
                else:
                    raise ValueError(
                        "Not sure what lepton type to choose for event")

                    # get jets and MET
                jets, met = events['SelectedJets'][cut], events['SelectedMET'][
                    cut]

                # find matched permutations
                mp = ttmatcher.best_match(gen_hyp=events['SL'][cut],
                                          jets=jets,
                                          leptons=leptons,
                                          met=met)

                # find best permutations
                best_perms = ttpermutator.find_best_permutations(
                    jets=jets, leptons=leptons, MET=met, btagWP=btag_wps[0])
                valid_perms = ak.num(best_perms['TTbar'].pt) > 0

                # compare matched per to best perm
                bp_status = np.zeros(
                    cut.size, dtype=int
                )  # 0 == '' (no gen matching), 1 == 'right', 2 == 'matchable', 3 == 'unmatchable', 4 == 'sl_tau', 5 == 'noslep'
                perm_cat_array = compare_matched_best_perms(mp,
                                                            best_perms,
                                                            njets='3Jets')
                bp_status[cut] = perm_cat_array
                if ak.any(ak.num(events['SL']['Lepton'].pdgId) != 1):
                    raise ValueError(
                        "Number of leptons is incorrect for classifying tau+jets events"
                    )
                sl_tau_evts = ak.where(
                    np.abs(events['SL']['Lepton'].pdgId) == 15)[0]
                bp_status[sl_tau_evts] = 4

                ## create MT regions
                MT = make_vars.MT(leptons, met)
                MTHigh = ak.flatten(MT[valid_perms] >= MTcut)

                wts = (evt_weights.weight() *
                       btag_weights['%s_CEN' %
                                    btaggers[0]])[cut][valid_perms][MTHigh]
                output = self.make_3j_categories(
                    acc=output,
                    leptype=lepton,
                    permarray=bp_status[cut][valid_perms][MTHigh],
                    genttbar=events['SL'][cut][valid_perms][MTHigh],
                    bp=best_perms[valid_perms][MTHigh],
                    evt_wts=wts)

        return output

Example #9

Show file

    def process(self, events):

        events = events[
            ak.num(events.Jet) >
            0]  #corrects for rare case where there isn't a single jet in event
        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        # cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ### For FCNC, we want electron -> tightTTH
        electron = Collections(ev, "Electron", "tightFCNC").get()
        fakeableelectron = Collections(ev, "Electron", "fakeableFCNC").get()

        muon = Collections(ev, "Muon", "tightFCNC").get()
        fakeablemuon = Collections(ev, "Muon", "fakeableFCNC").get()

        ##Jets
        Jets = events.Jet

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        lepton = fakeablemuon  #ak.concatenate([fakeablemuon, fakeableelectron], axis=1)
        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        selection = PackedSelection()
        selection.add('MET<20', (ev.MET.pt < 20))
        selection.add('mt<20', min_mt_lep_met < 20)
        #selection.add('MET<19',        (ev.MET.pt<19) )
        selection_reqs = ['MET<20', 'mt<20']  #, 'MET<19']
        fcnc_reqs_d = {sel: True for sel in selection_reqs}
        fcnc_selection = selection.require(**fcnc_reqs_d)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)

        jets = getJets(
            ev, maxEta=2.4, minPt=25, pt_var='pt'
        )  #& (ak.num(jets[~match(jets, fakeablemuon, deltaRCut=1.0)])>=1)
        single_muon_sel = (ak.num(muon) == 1) & (ak.num(fakeablemuon) == 1) | (
            ak.num(muon) == 0) & (ak.num(fakeablemuon) == 1)
        single_electron_sel = (ak.num(electron) == 1) & (
            ak.num(fakeableelectron)
            == 1) | (ak.num(electron) == 0) & (ak.num(fakeableelectron) == 1)
        fcnc_muon_sel = (ak.num(
            jets[~match(jets, fakeablemuon, deltaRCut=1.0)]) >=
                         1) & fcnc_selection & single_muon_sel
        fcnc_electron_sel = (ak.num(
            jets[~match(jets, fakeableelectron, deltaRCut=1.0)]) >=
                             1) & fcnc_selection & single_electron_sel
        tight_muon_sel = (ak.num(muon) == 1) & fcnc_muon_sel
        loose_muon_sel = (ak.num(fakeablemuon) == 1) & fcnc_muon_sel
        tight_electron_sel = (ak.num(electron) == 1) & fcnc_electron_sel
        loose_electron_sel = (ak.num(fakeableelectron)
                              == 1) & fcnc_electron_sel

        output['single_mu_fakeable'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].eta))))
        output['single_mu'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(muon[tight_muon_sel].conePt)),
            eta=np.abs(ak.to_numpy(ak.flatten(muon[tight_muon_sel].eta))))
        output['single_e_fakeable'].fill(
            dataset=dataset,
            pt=ak.to_numpy(
                ak.flatten(fakeableelectron[loose_electron_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(
                    ak.flatten(fakeableelectron[loose_electron_sel].eta))))
        output['single_e'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(electron[tight_electron_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(ak.flatten(electron[tight_electron_sel].eta))))

        if self.debug:
            #create pandas dataframe for debugging
            passed_events = ev[tight_muon_sel]
            passed_muons = muon[tight_muon_sel]
            event_p = ak.to_pandas(passed_events[["event"]])
            event_p["MET_PT"] = passed_events["MET"]["pt"]
            event_p["mt"] = min_mt_lep_met[tight_muon_sel]
            event_p["num_tight_mu"] = ak.to_numpy(ak.num(muon)[tight_muon_sel])
            event_p["num_loose_mu"] = ak.num(fakeablemuon)[tight_muon_sel]
            muon_p = ak.to_pandas(
                ak.flatten(passed_muons)[[
                    "pt", "conePt", "eta", "dz", "dxy", "ptErrRel",
                    "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso",
                    "jetPtRelv2"
                ]])
            #convert to numpy array for the output
            events_array = pd.concat([muon_p, event_p], axis=1)

            events_to_add = [6886009]
            for e in events_to_add:
                tmp_event = ev[ev.event == e]
                added_event = ak.to_pandas(tmp_event[["event"]])
                added_event["MET_PT"] = tmp_event["MET"]["pt"]
                added_event["mt"] = min_mt_lep_met[ev.event == e]
                added_event["num_tight_mu"] = ak.to_numpy(
                    ak.num(muon)[ev.event == e])
                added_event["num_loose_mu"] = ak.to_numpy(
                    ak.num(fakeablemuon)[ev.event == e])
                add_muon = ak.to_pandas(
                    ak.flatten(muon[ev.event == e])[[
                        "pt", "conePt", "eta", "dz", "dxy", "ptErrRel",
                        "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso",
                        "jetPtRelv2"
                    ]])
                add_concat = pd.concat([add_muon, added_event], axis=1)
                events_array = pd.concat([events_array, add_concat], axis=0)

            output['muons_df'] += processor.column_accumulator(
                events_array.to_numpy())

        return output

Example #10

Show file

    def process(self, events):
        
        output = self.accumulator.identity()
        
        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet)>=2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        
        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 15) & (np.abs(electron.eta) < 2.4)]

        electron = electron[(electron.genPartIdx >= 0)]
        electron = electron[(np.abs(electron.matched_gen.pdgId)==11)]  #from here on all leptons are gen-matched
        electron = electron[( (electron.genPartFlav==1) | (electron.genPartFlav==15) )] #and now they are all prompt
     
        
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]
        
        leading_parent = find_first_parent(leading_electron.matched_gen)
        trailing_parent = find_first_parent(trailing_electron.matched_gen)
        
       
        is_flipped = ( ( (electron.matched_gen.pdgId*(-1) == electron.pdgId) | (find_first_parent(electron.matched_gen)*(-1) == electron.pdgId) ) & (np.abs(electron.pdgId) == 11) )
        
        
        flipped_electron = electron[is_flipped]
        flipped_electron = flipped_electron[(ak.fill_none(flipped_electron.pt, 0)>0)]
        flipped_electron = flipped_electron[~(ak.is_none(flipped_electron))]
        n_flips = ak.num(flipped_electron)
                
        ##Muons
        muon     = Collections(ev, "Muon", "tightFCNC").get()
        muon = muon[(muon.pt > 15) & (np.abs(muon.eta) < 2.4)]
        
        muon = muon[(muon.genPartIdx >= 0)]
        muon = muon[(np.abs(muon.matched_gen.pdgId)==13)] #from here, all muons are gen-matched
        muon = muon[( (muon.genPartFlav==1) | (muon.genPartFlav==15) )] #and now they are all prompt
       
        
        ##Leptons

        lepton   = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton)==2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton)==2)
        
        emulepton = (ak.num(electron) == 1) & (ak.num(muon) == 1)
        no_mumu = (ak.num(muon) <= 1)
        
        
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        
        
        #jets
        jet       = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet       = jet[ak.argsort(jet.pt, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] 
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights( len(ev) )
        weight2 = Weights( len(ev))
        
        if not dataset=='MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)
            
        weight2.add("charge flip", self.charge_flip_ratio.flip_weight(electron))
                                   
                      
        #selections    
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)
        diele = (ak.num(electron) == 2)
        emu = (emulepton)
        flips = (n_flips == 1)
        no_flips = (n_flips == 0)
        nmm = no_mumu
        
        
        selection = PackedSelection()
        selection.add('filter',      (filters) )
        selection.add('ss',          ss )
        selection.add('os',          os )
        selection.add('jet',         jet_all )
        selection.add('ee',          diele)
        selection.add('emu',         emu)
        selection.add('flip',        flips)
        selection.add('nflip',       no_flips)
        selection.add('no_mumu',     nmm)
        
        bl_reqs = ['filter'] + ['jet']

        bl_reqs_d = { sel: True for sel in bl_reqs }
        baseline = selection.require(**bl_reqs_d)
        
        f_reqs = bl_reqs + ['flip'] + ['ss'] + ['ee']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)
        
        f2_reqs = bl_reqs + ['flip'] + ['ss'] + ['emu']
        f2_reqs_d = {sel: True for sel in f2_reqs}
        flip_sel2 = selection.require(**f2_reqs_d)
        
        f3_reqs = bl_reqs + ['flip'] + ['ss'] + ['no_mumu']
        f3_reqs_d = {sel: True for sel in f3_reqs}
        flip_sel3 = selection.require(**f3_reqs_d)
        
        nf_reqs = bl_reqs + ['nflip'] + ['os'] + ['ee']
        nf_reqs_d = {sel: True for sel in nf_reqs}
        n_flip_sel = selection.require(**nf_reqs_d)
        
        nf2_reqs = bl_reqs + ['nflip'] + ['os'] + ['emu']
        nf2_reqs_d = {sel: True for sel in nf2_reqs}
        n_flip_sel2 = selection.require(**nf2_reqs_d)
        
        nf3_reqs = bl_reqs + ['nflip'] + ['os'] + ['no_mumu']
        nf3_reqs_d = {sel: True for sel in nf3_reqs}
        n_flip_sel3 = selection.require(**nf3_reqs_d)
        
        s_reqs = bl_reqs + ['ss'] + ['no_mumu']
        s_reqs_d = { sel: True for sel in s_reqs }
        ss_sel = selection.require(**s_reqs_d)
        
        o_reqs = bl_reqs + ['os'] + ['no_mumu']
        o_reqs_d = {sel: True for sel in o_reqs }
        os_sel = selection.require(**o_reqs_d)
        
        ees_reqs = bl_reqs + ['ss'] + ['ee']
        ees_reqs_d = { sel: True for sel in ees_reqs }
        eess_sel = selection.require(**ees_reqs_d)
        
        eeo_reqs = bl_reqs + ['os'] + ['ee']
        eeo_reqs_d = {sel: True for sel in eeo_reqs }
        eeos_sel = selection.require(**eeo_reqs_d)
        
        ems_reqs = bl_reqs + ['ss'] + ['emu']
        ems_reqs_d = { sel: True for sel in ems_reqs }
        emss_sel = selection.require(**ems_reqs_d)
        
        emo_reqs = bl_reqs + ['os'] + ['emu']
        emo_reqs_d = {sel: True for sel in emo_reqs }
        emos_sel = selection.require(**emo_reqs_d)
        
       
        #outputs
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[baseline], weight=weight.weight()[baseline])
        
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(lepton)[ss_sel], weight=weight.weight()[ss_sel])
                      
        output['N_ele2'].fill(dataset=dataset, multiplicity=ak.num(lepton)[os_sel], weight=weight2.weight()[os_sel])
        
        output['electron_flips'].fill(dataset=dataset, multiplicity = n_flips[flip_sel], weight=weight.weight()[flip_sel])

        output['electron_flips2'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel], weight=weight2.weight()[n_flip_sel])
        
        output['electron_flips3'].fill(dataset=dataset, multiplicity = n_flips[flip_sel2], weight=weight.weight()[flip_sel2])

        output['electron_flips4'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel2], weight=weight2.weight()[n_flip_sel2])
        

        output["electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel3].eta))),
            weight = weight.weight()[flip_sel3]
        )
        
        output["electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].eta))),
            weight = weight2.weight()[n_flip_sel3]
        )
        
        output["flipped_electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel].eta))),
            weight = weight.weight()[flip_sel]
        )
        
        output["flipped_electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].eta))),
            weight = weight2.weight()[n_flip_sel]
        )
        
        output["flipped_electron3"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel2].eta))),
            weight = weight.weight()[flip_sel2]
        )
        
        output["flipped_electron4"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].eta))),
            weight = weight2.weight()[n_flip_sel2]
        )
        
        #output["lepton_parent"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(leading_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)
        #
        #output["lepton_parent2"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(trailing_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)

        return output

Example #11

Show file

class Selection:
    def __init__(self, **kwargs):
        '''
        kwargs should be:
        ele (loose and tight)
        mu
        jets: all, central, forward, b-tag
        met
        
        '''
        self.__dict__.update(kwargs)


        ## not yet sure whether this should go here, or later
        #self.filters   = getFilters(self.events, year=self.year, dataset=self.dataset)


    def dilep_baseline(self, omit=[], cutflow=None, tight=False, SS=True):
        '''
        give it a cutflow object if you want it to be filed.
        cuts in the omit list will not be applied
        '''
        self.selection = PackedSelection()

        is_dilep   = ((ak.num(self.ele) + ak.num(self.mu))==2)
        pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0)
        lep0pt     = ((ak.num(self.ele[(self.ele.pt>25)]) + ak.num(self.mu[(self.mu.pt>25)]))>0)
        lep1pt     = ((ak.num(self.ele[(self.ele.pt>20)]) + ak.num(self.mu[(self.mu.pt>20)]))>1)
        lepveto    = ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==2)

        dimu    = choose(self.mu, 2)
        diele   = choose(self.ele, 2)
        dilep   = cross(self.mu, self.ele)

        if SS:
            is_SS = ( ak.any((dimu['0'].charge * dimu['1'].charge)>0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)>0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)>0, axis=1) )
        else:
            is_OS = ( ak.any((dimu['0'].charge * dimu['1'].charge)<0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)<0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)<0, axis=1) )

        lepton = ak.concatenate([self.ele, self.mu], axis=1)
        lepton_pdgId_pt_ordered = ak.fill_none(
            ak.pad_none(
                lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True),
        0)

        triggers  = getTriggers(self.events,
            ak.flatten(lepton_pdgId_pt_ordered[:,0:1]),
            ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset)

        ht = ak.sum(self.jet_all.pt, axis=1)
        st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1)

        self.selection.add('lepveto',       lepveto)
        self.selection.add('dilep',         is_dilep)
        #self.selection.add('filter',        self.filters)
        self.selection.add('trigger',       triggers)
        self.selection.add('p_T(lep0)>25',  lep0pt)
        self.selection.add('p_T(lep1)>20',  lep1pt)
        if SS:
            self.selection.add('SS',            is_SS )
        else:
            self.selection.add('OS',            is_OS )
        self.selection.add('N_jet>3',       (ak.num(self.jet_all)>3) )
        self.selection.add('N_jet>4',       (ak.num(self.jet_all)>4) )
        self.selection.add('N_central>2',   (ak.num(self.jet_central)>2) )
        self.selection.add('N_central>3',   (ak.num(self.jet_central)>3) )
        self.selection.add('N_btag>0',      (ak.num(self.jet_btag)>0) )
        self.selection.add('N_fwd>0',       (ak.num(self.jet_fwd)>0) )
        self.selection.add('MET>30',        (self.met.pt>30) )
        self.selection.add('MET>50',        (self.met.pt>50) )
        self.selection.add('ST>600',        (st>600) )

        ss_reqs = [
        #    'filter',
            'lepveto',
            'dilep',
            'p_T(lep0)>25',
            'p_T(lep1)>20',
            'trigger',
            'SS' if SS else 'OS',
            'N_jet>3',
            'N_central>2',
            'N_btag>0',
            'MET>30',
            'N_fwd>0',
        ]
        
        if tight:
            ss_reqs += [
                'N_jet>4',
                'N_central>3',
                'ST>600',
                'MET>50',
                #'delta_eta',
            ]

        ss_reqs_d = { sel: True for sel in ss_reqs if not sel in omit }
        ss_selection = self.selection.require(**ss_reqs_d)

        if cutflow:
            #
            cutflow_reqs_d = {}
            for req in ss_reqs:
                cutflow_reqs_d.update({req: True})
                cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) )

        return ss_selection


    def trilep_baseline(self, omit=[], cutflow=None, tight=False):
        '''
        give it a cutflow object if you want it to be filed.
        cuts in the omit list will not be applied
        '''
        self.selection = PackedSelection()

        is_trilep  = ((ak.num(self.ele) + ak.num(self.mu))==3)
        pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0)
        lep0pt     = ((ak.num(self.ele[(self.ele.pt>25)]) + ak.num(self.mu[(self.mu.pt>25)]))>0)
        lep1pt     = ((ak.num(self.ele[(self.ele.pt>20)]) + ak.num(self.mu[(self.mu.pt>20)]))>1)
        lepveto    = ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==3)

        dimu    = choose(self.mu, 2)
        diele   = choose(self.ele, 2)
        dilep   = cross(self.mu, self.ele)

        OS_dimu = dimu[(dimu['0'].charge*dimu['1'].charge < 0)]
        OS_diele = diele[(diele['0'].charge*diele['1'].charge < 0)]

        offZ = (ak.all(abs(OS_dimu.mass-91.2)>10, axis=1) & ak.all(abs(OS_diele.mass-91.2)>10, axis=1))

        lepton = ak.concatenate([self.ele, self.mu], axis=1)
        lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0)

        triggers  = getTriggers(self.events,
            ak.flatten(lepton_pdgId_pt_ordered[:,0:1]),
            ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset)

        ht = ak.sum(self.jet_all.pt, axis=1)
        st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1)

        self.selection.add('lepveto',       lepveto)
        self.selection.add('trilep',        is_trilep)
        self.selection.add('filter',        self.filters)
        self.selection.add('trigger',       triggers)
        self.selection.add('p_T(lep0)>25',  lep0pt)
        self.selection.add('p_T(lep1)>20',  lep1pt)
        self.selection.add('N_jet>2',       (ak.num(self.jet_all)>2) )
        self.selection.add('N_jet>3',       (ak.num(self.jet_all)>3) )
        self.selection.add('N_central>1',   (ak.num(self.jet_central)>1) )
        self.selection.add('N_central>2',   (ak.num(self.jet_central)>2) )
        self.selection.add('N_btag>0',      (ak.num(self.jet_btag)>0) )
        self.selection.add('N_fwd>0',       (ak.num(self.jet_fwd)>0) )
        self.selection.add('MET>50',        (self.met.pt>50) )
        self.selection.add('ST>600',        (st>600) )
        self.selection.add('offZ',          offZ )

        reqs = [
            'filter',
            'lepveto',
            'trilep',
            'p_T(lep0)>25',
            'p_T(lep1)>20',
            'trigger',
            'offZ',
            'MET>50',
            'N_jet>2',
            'N_central>1',
            'N_btag>0',
            'N_fwd>0',
        ]
        
        if tight:
            reqs += [
                'N_jet>3',
                'N_central>2',
                'ST>600',
                #'MET>50',
                #'delta_eta',
            ]

        reqs_d = { sel: True for sel in reqs if not sel in omit }
        selection = self.selection.require(**reqs_d)

        self.reqs = [ sel for sel in reqs if not sel in omit ]

        if cutflow:
            #
            cutflow_reqs_d = {}
            for req in reqs:
                cutflow_reqs_d.update({req: True})
                cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) )

        return selection

Example #12

Show file

File: Histograms.py Project: XL-Seb-Yan/toffea

    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:  #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt -
                            (selected_jets[:, 1] +
                             selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt -
                            (selected_jets[:, 2] +
                             selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt -
                            (selected_jets[:, 0] +
                             selected_jets[:, 1]).pt) / m3j

        # Event selection masks
        selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(
            **{name: True
               for name in selection.names})
        selection_masks["Pre-selection"] = sel_mask

        # HLT_trigger (this is already done at the beginning)
        # if year == "2016":
        # JetHLT_mask = []
        # if "2016B2" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # elif "2016H" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # else:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2017":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2018":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]

        # Fill histograms
        for selection, selection_mask in selection_masks.items():
            output["mjjj"].fill(dataset=dataset_name,
                                selection=selection,
                                mjjj=m3j[selection_mask])

            output["m_ij"].fill(dataset=dataset_name,
                                selection=selection,
                                m_01=m_ij[:, 0][selection_mask],
                                m_12=m_ij[:, 1][selection_mask],
                                m_20=m_ij[:, 2][selection_mask])

            output["dR_ij"].fill(dataset=dataset_name,
                                 selection=selection,
                                 dR_01=dR_ij[:, 0][selection_mask],
                                 dR_12=dR_ij[:, 1][selection_mask],
                                 dR_20=dR_ij[:, 2][selection_mask])

            output["dEta_ij"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dEta_01=dEta_ij[:, 0][selection_mask],
                                   dEta_12=dEta_ij[:, 1][selection_mask],
                                   dEta_20=dEta_ij[:, 2][selection_mask])

            output["moverM_ij"].fill(dataset=dataset_name,
                                     selection=selection,
                                     moverM_01=m_01_overM[selection_mask],
                                     moverM_12=m_12_overM[selection_mask],
                                     moverM_20=m_20_overM[selection_mask])

            output["pt_i"].fill(dataset=dataset_name,
                                selection=selection,
                                pt_0=selected_jets[:, 0][selection_mask].pt,
                                pt_1=selected_jets[:, 1][selection_mask].pt,
                                pt_2=selected_jets[:, 2][selection_mask].pt)

            output["eta_i"].fill(dataset=dataset_name,
                                 selection=selection,
                                 eta_0=selected_jets[:, 0][selection_mask].eta,
                                 eta_1=selected_jets[:, 1][selection_mask].eta,
                                 eta_2=selected_jets[:, 2][selection_mask].eta)

            output["ptoverM_i"].fill(dataset=dataset_name,
                                     selection=selection,
                                     ptoverM_0=pt_i_overM[:,
                                                          0][selection_mask],
                                     ptoverM_1=pt_i_overM[:,
                                                          1][selection_mask],
                                     ptoverM_2=pt_i_overM[:,
                                                          2][selection_mask])

            output["dR_i_jk"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dR_0_12=dR_i_jk[:, 0][selection_mask],
                                   dR_1_20=dR_i_jk[:, 1][selection_mask],
                                   dR_2_01=dR_i_jk[:, 2][selection_mask])

            output["dEta_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dEta_0_12=dEta_i_jk[:, 0][selection_mask],
                                     dEta_1_20=dEta_i_jk[:, 1][selection_mask],
                                     dEta_2_01=dEta_i_jk[:, 2][selection_mask])

            output["dPhi_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dPhi_0_12=dPhi_i_jk[:, 0][selection_mask],
                                     dPhi_1_20=dPhi_i_jk[:, 1][selection_mask],
                                     dPhi_2_01=dPhi_i_jk[:, 2][selection_mask])

            output["dPtoverM_i_jk"].fill(
                dataset=dataset_name,
                selection=selection,
                dPtoverM_0_12=dPtoverM_0_12[selection_mask],
                dPtoverM_1_20=dPtoverM_1_20[selection_mask],
                dPtoverM_2_01=dPtoverM_2_01[selection_mask])
            pt_i_overM_2fill = pt_i_overM[selection_mask]
            dR_ij_2fill = dR_ij[selection_mask]
            dEta_ij_2fill = dEta_ij[selection_mask]
            dR_i_jk_2fill = dR_i_jk[selection_mask]
            dEta_i_jk_2fill = dEta_i_jk[selection_mask]
            dPhi_i_jk_2fill = dPhi_i_jk[selection_mask]
            dPtoverM_0_12_2fill = dPtoverM_0_12[selection_mask]
            dPtoverM_1_20_2fill = dPtoverM_1_20[selection_mask]
            dPtoverM_2_01_2fill = dPtoverM_2_01[selection_mask]
            selected_jets_2fill = selected_jets[selection_mask]

            max_pt_overM_2fill = awk.max(pt_i_overM_2fill, axis=1)
            min_pt_overM_2fill = awk.min(pt_i_overM_2fill, axis=1)
            max_dR_2fill = awk.max(dR_ij_2fill, axis=1)
            max_dEta_2fill = awk.max(dEta_ij_2fill, axis=1)
            min_dR_2fill = awk.min(dR_ij_2fill, axis=1)
            min_dEta_2fill = awk.min(dEta_ij_2fill, axis=1)
            min_pt_2fill = awk.min(selected_jets_2fill.pt, axis=1)
            max_eta_2fill = awk.max(abs(selected_jets_2fill.eta), axis=1)
            max_dR_i_jk_2fill = awk.max(dR_i_jk_2fill, axis=1)
            min_dR_i_jk_2fill = awk.min(dR_i_jk_2fill, axis=1)
            max_dEta_i_jk_2fill = awk.max(dEta_i_jk_2fill, axis=1)
            min_dEta_i_jk_2fill = awk.min(dEta_i_jk_2fill, axis=1)
            max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk_2fill, axis=1)
            min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk_2fill, axis=1)
            max_dPtoverM_i_jk_2fill = []
            min_dPtoverM_i_jk_2fill = []
            for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill,
                            dPtoverM_2_01_2fill):
                max_dPtoverM_i_jk_2fill.append(max(pair))
                min_dPtoverM_i_jk_2fill.append(min(pair))
            max_dPtoverM_i_jk_2fill = np.array(max_dPtoverM_i_jk_2fill)
            min_dPtoverM_i_jk_2fill = np.array(min_dPtoverM_i_jk_2fill)

            max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
            min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
            max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
            max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
            min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
            min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
            min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
            max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
            max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
            min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
            max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
            min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
            max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
            min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)

            output["max_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  max_dR=max_dR_2fill)

            output["max_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    max_dEta=max_dEta_2fill)

            output["min_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_dR=min_dR_2fill)

            output["min_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    min_dEta=min_dEta_2fill)

            output["min_pt"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_pt=min_pt_2fill)

            output["max_eta"].fill(dataset=dataset_name,
                                   selection=selection,
                                   max_eta=max_eta_2fill)

            output["max_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_ptoverM=max_pt_overM_2fill)

            output["min_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_ptoverM=min_pt_overM_2fill)

            output["max_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_dR_j_jj=max_dR_i_jk_2fill)

            output["max_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dEta_j_jj=max_dEta_i_jk_2fill)

            output["max_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dPhi_j_jj=max_dPhi_i_jk_2fill)

            output["max_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                max_dPtoverM_j_jj=max_dPtoverM_i_jk_2fill)

            output["min_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_dR_j_jj=min_dR_i_jk_2fill)

            output["min_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dEta_j_jj=min_dEta_i_jk_2fill)

            output["min_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dPhi_j_jj=min_dPhi_i_jk_2fill)

            output["min_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                min_dPtoverM_j_jj=min_dPtoverM_i_jk_2fill)

        return output

Example #13

Show file

File: ML.py Project: XL-Seb-Yan/toffea

    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()
        
        # Initialize dict accumulators, if have not been initialized
        for jet in [0, 1, 2]:
            if dataset_name not in output[f"eta_{jet}_final"].keys():
                output[f"eta_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"ptoverM_{jet}_final"].keys():
                output[f"ptoverM_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            if dataset_name not in output[f"dEta_{pair[0]}{pair[1]}_final"].keys():
                output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dR_{pair[0]}{pair[1]}_final"].keys():
                output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"moverM_{pair[0]}{pair[1]}_final"].keys():
                output[f"moverM_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            if dataset_name not in output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_max_final"].keys():
            output[f"ptoverM_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_min_final"].keys():
            output[f"ptoverM_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"eta_max_final"].keys():
            output[f"eta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_max_final"].keys():
            output[f"dR_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_min_final"].keys():
            output[f"dR_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_max_final"].keys():
            output[f"dEta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_min_final"].keys():
            output[f"dEta_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_max_final"].keys():
            output[f"dR_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_min_final"].keys():
            output[f"dR_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_max_final"].keys():
            output[f"dEta_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_min_final"].keys():
            output[f"dEta_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_max_final"].keys():
            output[f"dPhi_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_min_final"].keys():
            output[f"dPhi_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_max_final"].keys():
            output[f"dPtoverM_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_min_final"].keys():
            output[f"dPtoverM_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name: #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        
        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]
        
        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0]
        
        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta)
        
        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] + selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass
        
        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:,0] / m3j
        m_12_overM = m_ij[:,1] / m3j
        m_20_overM = m_ij[:,2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt - (selected_jets[:, 1] + selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt - (selected_jets[:, 2] + selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt - (selected_jets[:, 0] + selected_jets[:, 1]).pt) / m3j
        
        # Event selection masks
        # selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(**{name: True for name in selection.names})
        # selection_masks["Pre-selection"] = sel_mask
        
        output["selected_events"][dataset_name] += events_3j[sel_mask].__len__()
        
        for jet in [0, 1, 2]:
            output[f"eta_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(selected_jets[:, jet][sel_mask].eta))
            output[f"ptoverM_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(pt_i_overM[:, jet][sel_mask]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_ij[:, pair[0]][sel_mask]))
            output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_ij[:, pair[0]][sel_mask]))
        
        output[f"moverM_01_final"][dataset_name] += processor.column_accumulator(np.array(m_01_overM[sel_mask]))
        output[f"moverM_12_final"][dataset_name] += processor.column_accumulator(np.array(m_12_overM[sel_mask]))
        output[f"moverM_20_final"][dataset_name] += processor.column_accumulator(np.array(m_20_overM[sel_mask]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_i_jk[:, pair[0]][sel_mask]))
            output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_i_jk[:, pair[0]][sel_mask]))
            output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dPhi_i_jk[:, pair[0]][sel_mask]))
        
        output[f"dPtoverM_0_12_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_0_12[sel_mask]))
        output[f"dPtoverM_1_20_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_1_20[sel_mask]))
        output[f"dPtoverM_2_01_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_2_01[sel_mask]))
        
        max_pt_overM_2fill = awk.max(pt_i_overM[sel_mask], axis=1)
        min_pt_overM_2fill = awk.min(pt_i_overM[sel_mask], axis=1)
        max_dR_2fill   = awk.max(dR_ij[sel_mask], axis=1)
        max_dEta_2fill = awk.max(dEta_ij[sel_mask], axis=1)
        min_dR_2fill   = awk.min(dR_ij[sel_mask], axis=1)
        min_dEta_2fill = awk.min(dEta_ij[sel_mask], axis=1)
        min_pt_2fill   = awk.min(selected_jets[sel_mask].pt, axis=1)
        max_eta_2fill  = awk.max(abs(selected_jets[sel_mask].eta), axis=1)
        max_dR_i_jk_2fill = awk.max(dR_i_jk[sel_mask], axis=1)
        min_dR_i_jk_2fill = awk.min(dR_i_jk[sel_mask], axis=1)
        max_dEta_i_jk_2fill = awk.max(dEta_i_jk[sel_mask], axis=1)
        min_dEta_i_jk_2fill = awk.min(dEta_i_jk[sel_mask], axis=1)
        max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk[sel_mask], axis=1)
        min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk[sel_mask], axis=1)
        max_dPtoverM_i_jk_2fill = []
        min_dPtoverM_i_jk_2fill = []
        dPtoverM_0_12_2fill = dPtoverM_0_12[sel_mask]
        dPtoverM_1_20_2fill = dPtoverM_1_20[sel_mask]
        dPtoverM_2_01_2fill = dPtoverM_2_01[sel_mask]
        for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill, dPtoverM_2_01_2fill):
            max_dPtoverM_i_jk_2fill.append(max(pair))
            min_dPtoverM_i_jk_2fill.append(min(pair))
        max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
        min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
        max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
        max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
        min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
        min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
        min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
        max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
        max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
        min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
        max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
        min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
        max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
        min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)
        
        output[f"ptoverM_max_final"][dataset_name] += processor.column_accumulator(np.array(max_pt_overM_2fill))
        output[f"ptoverM_min_final"][dataset_name] += processor.column_accumulator(np.array(min_pt_overM_2fill))
        output[f"eta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_eta_2fill))
        output[f"dR_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_2fill))
        output[f"dR_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_2fill))
        output[f"dEta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_2fill))
        output[f"dEta_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_2fill))
        output[f"dR_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_i_jk_2fill))
        output[f"dR_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_i_jk_2fill))
        output[f"dEta_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_i_jk_2fill))
        output[f"dEta_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_i_jk_2fill))
        output[f"dPhi_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPhi_i_jk_2fill))
        output[f"dPhi_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPhi_i_jk_2fill))
        output[f"dPtoverM_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPtoverM_i_jk_2fill))
        output[f"dPtoverM_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPtoverM_i_jk_2fill))

        return output

Example #14

Show file

File: nano_analysis.py Project: JacksonWallace/tW_scattering

    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = ev.Muon

        ## Electrons
        electron = ev.Electron

        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)

        lepton = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)

        filters = getFilters(ev, year=self.year, dataset=dataset)
        dilep = ((ak.num(electron) + ak.num(muon)) == 2)

        selection = PackedSelection()
        selection.add('dilep', dilep)
        selection.add('filter', (filters))

        bl_reqs = ['dilep', 'filter']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[baseline],
                             weight=weight.weight()[baseline])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(muon)[baseline],
                            weight=weight.weight()[baseline])

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[baseline].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[baseline].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[baseline].phi)),
            weight=weight.weight()[baseline])

        return output

Example #15

Show file

File: forward_jet.py Project: JacksonWallace/tW_scattering

    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)
        triggers = getTriggers(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(electron) == 1) & (ak.num(muon) == 1))
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(vetoelectron) + ak.num(vetomuon)) == 2)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('trigger', (triggers))
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('OS', OSlepton)
        selection.add('N_btag=2', (ak.num(btag) == 2))
        selection.add('N_jet>2', (ak.num(jet) >= 3))
        selection.add('N_central>1', (ak.num(central) >= 2))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))
        selection.add('MET>30', (ev.MET.pt > 30))

        os_reqs = [
            'lepveto', 'dilep', 'trigger', 'filter', 'p_T(lep0)>25',
            'p_T(lep1)>20', 'OS'
        ]
        bl_reqs = os_reqs + [
            'N_btag=2', 'N_jet>2', 'N_central>1', 'N_fwd>0', 'MET>30'
        ]

        os_reqs_d = {sel: True for sel in os_reqs}
        os_selection = selection.require(**os_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        cutflow = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in bl_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow(req, selection.require(**cutflow_reqs_d))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[os_selection].npvs,
                               weight=weight.weight()[os_selection])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_selection].npvsGood,
                                   weight=weight.weight()[os_selection])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[os_selection],
                           weight=weight.weight()[os_selection])
        output['N_central'].fill(dataset=dataset,
                                 multiplicity=ak.num(central)[os_selection],
                                 weight=weight.weight()[os_selection])
        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(electron)[os_selection],
                            weight=weight.weight()[os_selection])
        output['N_fwd'].fill(dataset=dataset,
                             multiplicity=ak.num(fwd)[os_selection],
                             weight=weight.weight()[os_selection])

        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[os_selection].pt,
                           phi=ev.MET[os_selection].phi,
                           weight=weight.weight()[os_selection])

        output['electron'].fill(dataset=dataset,
                                pt=ak.to_numpy(ak.flatten(electron[BL].pt)),
                                eta=ak.to_numpy(ak.flatten(electron[BL].eta)),
                                phi=ak.to_numpy(ak.flatten(electron[BL].phi)),
                                weight=weight.weight()[BL])

        output['muon'].fill(dataset=dataset,
                            pt=ak.to_numpy(ak.flatten(muon[BL].pt)),
                            eta=ak.to_numpy(ak.flatten(muon[BL].eta)),
                            phi=ak.to_numpy(ak.flatten(muon[BL].phi)),
                            weight=weight.weight()[BL])

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['trail_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['fwd_jet'].fill(dataset=dataset,
                               pt=ak.flatten(high_p_fwd[BL].pt_nom),
                               eta=ak.flatten(high_p_fwd[BL].eta),
                               phi=ak.flatten(high_p_fwd[BL].phi),
                               weight=weight.weight()[BL])

        output['b1'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 0:1][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 0:1][BL].phi),
                          weight=weight.weight()[BL])

        output['b2'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 1:2][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt_nom[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        output['j2'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(jet[:, 1:2][BL].eta),
                          phi=ak.flatten(jet[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j3'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 2:3][BL].pt_nom),
                          eta=ak.flatten(jet[:, 2:3][BL].eta),
                          phi=ak.flatten(jet[:, 2:3][BL].phi),
                          weight=weight.weight()[BL])

        # Now, take care of systematic unceratinties
        if not dataset == 'MuonEG':
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt > 25)]
                jet = jet[~match(
                    jet, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet = jet[~match(
                    jet, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                central = jet[(abs(jet.eta) < 2.4)]
                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd = getFwdJet(light)
                fwd_noPU = getFwdJet(light, puId=False)

                ## forward jets
                high_p_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.p, axis=1))]  # highest momentum spectator
                high_pt_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(
                    ak.argmax(abs(fwd.eta), axis=1))]  # most forward spectator

                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(
                    central.btagDeepFlavB)][:, :2]

                # get the modified selection -> more difficult
                selection.add('N_jet>2_' + var,
                              (ak.num(jet.pt) >= 3))  # stupid bug here...
                selection.add('N_btag=2_' + var, (ak.num(btag) == 2))
                selection.add('N_central>1_' + var, (ak.num(central) >= 2))
                selection.add('N_fwd>0_' + var, (ak.num(fwd) >= 1))
                selection.add('MET>30_' + var, (getattr(ev.MET, var) > 30))

                ## Don't change the selection for now...
                bl_reqs = os_reqs + [
                    'N_jet>2_' + var, 'MET>30_' + var, 'N_btag=2_' + var,
                    'N_central>1_' + var, 'N_fwd>0_' + var
                ]
                bl_reqs_d = {sel: True for sel in bl_reqs}
                BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(jet)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_fwd_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(fwd)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_central_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(central)[os_selection],
                    weight=weight.weight()[os_selection])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet.eta[:, 0:1][BL]),
                                         phi=ak.flatten(jet.phi[:, 0:1][BL]),
                                         weight=weight.weight()[BL])

                output['b1_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta=ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi=ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight=weight.weight()[BL])

                output['fwd_jet_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_p_fwd[BL].pt),
                    eta=ak.flatten(high_p_fwd[BL].eta),
                    phi=ak.flatten(high_p_fwd[BL].phi),
                    weight=weight.weight()[BL])

                output['MET_' + var].fill(dataset=dataset,
                                          pt=getattr(ev.MET,
                                                     var)[os_selection],
                                          phi=ev.MET[os_selection].phi,
                                          weight=weight.weight()[os_selection])

        return output

Example #16

Show file

File: Optimization.py Project: XL-Seb-Yan/toffea

    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        max_dR = awk.max(dR_ij, axis=1)
        max_dEta = awk.max(dEta_ij, axis=1)
        min_dR = awk.min(dR_ij, axis=1)
        min_dEta = awk.min(dEta_ij, axis=1)
        min_pT = awk.min(selected_jets.pt, axis=1)
        max_eta = abs(awk.max(selected_jets.eta, axis=1))

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)
        dPt_i_jk = abs(selected_jets[:, jet_i].pt -
                       (selected_jets[:, jet_j] + selected_jets[:, jet_k]).pt)

        max_dPhi_jjj = awk.max(dPhi_i_jk, axis=1)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        max_pt_overM = awk.max(pt_i_overM, axis=1)
        min_pt_overM = awk.min(pt_i_overM, axis=1)
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j

        for pt_cut in range(30, 1150, 5):
            cut_name = f"min_pT_cut{pt_cut}".format(pt_cut)
            selection = PackedSelection()
            selection.add("MinJetPt_cut", min_pT > pt_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_min_pT_cut{pt_cut}".format(
                pt_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for eta_cut in np.arange(0, 2.5, 0.05):
            cut_name = f"max_eta_cut{eta_cut}".format(eta_cut)
            selection = PackedSelection()
            selection.add("MaxJetEta_cut", max_eta < eta_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_max_eta_cut{eta_cut}".format(
                eta_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dEta_max_cut in np.arange(0, 5, 0.1):
            cut_name = f"dEta_max_cut{dEta_max_cut}".format(dEta_max_cut)
            selection = PackedSelection()
            selection.add("MaxJJdEta_cut", max_dEta < dEta_max_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dEta_jj_max_cut{dEta_max_cut}".format(
                dEta_max_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dR_min_cut in np.arange(0, 5, 0.1):
            cut_name = f"dR_min_cut{dR_min_cut}".format(dR_min_cut)
            selection = PackedSelection()
            selection.add("MinJJdR_cut", min_dR > dR_min_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dR_jj_min_cut{dR_min_cut}".format(
                dR_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        #min cut for the variable dPhi_jjj_max
        # for dPhi_jjj_max_min_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        # for dPhi_jjj_min_max_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        return output

Example #17

Show file

File: multi_class_preprocessor.py Project: JacksonWallace/tW_scattering

    def process(self, events):

        output = self.accumulator.identity()

        output['total']['all'] += len(events)
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        ## Muons
        muon = Collections(ev, "Muon", "vetoTTH").get()
        tightmuon = Collections(ev, "Muon", "tightTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "vetoTTH").get()
        tightelectron = Collections(ev, "Electron", "tightTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)

        lepton = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton + trailing_lepton).mass
        dilepton_pt = (leading_lepton + trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        tau = getTaus(ev)
        track = getIsoTracks(ev)
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator

        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        bl = cross(lepton, high_score_btag)
        bl_dR = delta_r(bl['0'], bl['1'])
        min_bl_dR = ak.min(bl_dR, axis=1)

        jf = cross(j_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        j_fwd2 = jf[ak.singletons(
            ak.argmax(mjf, axis=1)
        )]['1']  # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta = ak.fill_none(
            ak.pad_none(abs(j_fwd2.eta - j_fwd.eta), 1, clip=True), 0)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(tightelectron) + ak.num(tightmuon)) == 2)
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(electron) + ak.num(muon)) == 2)

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('SS', (SSlepton | SSelectron | SSmuon))
        selection.add('N_jet>3', (ak.num(jet) >= 4))
        selection.add('N_central>2', (ak.num(central) >= 3))
        selection.add('N_btag>0', (ak.num(btag) >= 1))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))

        #ss_reqs = ['lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS']
        ss_reqs = [
            'lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS'
        ]
        #bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0', 'N_fwd>0']
        bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0']

        ss_reqs_d = {sel: True for sel in ss_reqs}
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight)

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        #cutflow     = Cutflow(output, ev, weight=weight)
        #cutflow_reqs_d = {}
        #for req in bl_reqs:
        #    cutflow_reqs_d.update({req: True})
        #    cutflow.addRow( req, selection.require(**cutflow_reqs_d) )

        labels = {
            'topW_v3': 0,
            'TTW': 1,
            'TTZ': 2,
            'TTH': 3,
            'ttbar': 4,
            'ttbar1l_MG': 4
        }
        if dataset in labels:
            label_mult = labels[dataset]
        else:
            label_mult = 5
        label = np.ones(len(ev[BL])) * label_mult

        output["n_lep"] += processor.column_accumulator(
            ak.to_numpy((ak.num(electron) + ak.num(muon))[BL]))
        output["n_lep_tight"] += processor.column_accumulator(
            ak.to_numpy((ak.num(tightelectron) + ak.num(tightmuon))[BL]))

        output["lead_lep_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].pt, axis=1)))
        output["lead_lep_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].eta, axis=1)))
        output["lead_lep_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].phi, axis=1)))
        output["lead_lep_charge"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].charge, axis=1)))

        output["sublead_lep_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].pt, axis=1)))
        output["sublead_lep_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].eta, axis=1)))
        output["sublead_lep_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].phi, axis=1)))
        output["sublead_lep_charge"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].charge, axis=1)))

        output["lead_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].pt, axis=1)))
        output["lead_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].eta, axis=1)))
        output["lead_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].phi, axis=1)))

        output["sublead_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].pt, axis=1)))
        output["sublead_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].eta, axis=1)))
        output["sublead_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].phi, axis=1)))

        output["lead_btag_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].pt, axis=1)))
        output["lead_btag_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].eta, axis=1)))
        output["lead_btag_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].phi, axis=1)))

        output["sublead_btag_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].pt, axis=1)))
        output["sublead_btag_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].eta, axis=1)))
        output["sublead_btag_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].phi, axis=1)))

        output["fwd_jet_p"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(ak.pad_none(j_fwd[BL].p, 1, clip=True),
                                        0),
                           axis=1)))
        output["fwd_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].pt, 1, clip=True), 0),
                           axis=1)))
        output["fwd_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].eta, 1, clip=True), 0),
                           axis=1)))
        output["fwd_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].phi, 1, clip=True), 0),
                           axis=1)))

        output["mjj_max"] += processor.column_accumulator(
            ak.to_numpy(ak.fill_none(ak.max(mjf[BL], axis=1), 0)))
        output["delta_eta_jj"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(delta_eta[BL], axis=1)))

        output["met"] += processor.column_accumulator(ak.to_numpy(met_pt[BL]))
        output["ht"] += processor.column_accumulator(ak.to_numpy(ht[BL]))
        output["st"] += processor.column_accumulator(ak.to_numpy(st[BL]))
        output["n_jet"] += processor.column_accumulator(
            ak.to_numpy(ak.num(jet[BL])))
        output["n_btag"] += processor.column_accumulator(
            ak.to_numpy(ak.num(btag[BL])))
        output["n_fwd"] += processor.column_accumulator(
            ak.to_numpy(ak.num(fwd[BL])))
        output["n_central"] += processor.column_accumulator(
            ak.to_numpy(ak.num(central[BL])))
        output["n_tau"] += processor.column_accumulator(
            ak.to_numpy(ak.num(tau[BL])))
        output["n_track"] += processor.column_accumulator(
            ak.to_numpy(ak.num(track[BL])))

        output["dilepton_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(dilepton_pt[BL], axis=1)))
        output["dilepton_mass"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(dilepton_mass[BL], axis=1)))
        output["min_bl_dR"] += processor.column_accumulator(
            ak.to_numpy(min_bl_dR[BL]))
        output["min_mt_lep_met"] += processor.column_accumulator(
            ak.to_numpy(min_mt_lep_met[BL]))

        output["label"] += processor.column_accumulator(label)
        output["weight"] += processor.column_accumulator(weight.weight()[BL])

        output["presel"]["all"] += len(ev[ss_selection])
        output["sel"]["all"] += len(ev[BL])

        return output

Example #18

Show file

    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.pt > 20) & (abs(electron.eta) < 2.4)]

        electron = electron[((electron.genPartIdx >= 0) &
                             (np.abs(electron.matched_gen.pdgId) == 11)
                             )]  #from here on all leptons are gen-matched

        ##Muons
        muon = Collections(ev, "Muon", "tight").get()
        muon = muon[(muon.pt > 20) & (abs(muon.eta) < 2.4)]

        muon = muon[((muon.genPartIdx >= 0) &
                     (np.abs(muon.matched_gen.pdgId) == 13))]

        ##Leptons

        lepton = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton) == 2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton) == 2)

        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet = jet[ak.argsort(
            jet.pt, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(jet, electron, deltaRCut=0.4)]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)

        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_weight(electron))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('jet', jet_all)

        bl_reqs = ['filter', 'jet']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        #outputs
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[baseline],
                             weight=weight.weight()[baseline])

        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(lepton)[ss_sel],
                             weight=weight.weight()[ss_sel])

        output['N_ele2'].fill(dataset=dataset,
                              multiplicity=ak.num(lepton)[os_sel],
                              weight=weight2.weight()[os_sel])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[ss_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[os_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[os_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        return output

Example #19

Show file

File: processor_data_v1.py Project: JacksonWallace/FCNC

    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        #output['totalEvents']['all'] += len(events)
        #output['skimmedEvents']['all'] += len(ev)

        if self.year == 2018:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2017:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2016:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ

        if self.year == 2018:
            lumimask = LumiMask(
                'processors/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt'
            )

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)]

        loose_electron = Collections(ev, "Electron", "veto").get()
        loose_electron = loose_electron[(loose_electron.pt > 25)
                                        & (np.abs(loose_electron.eta) < 2.4)]

        SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron)
                                                               == 2)
        OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron)
                                                               == 2)

        dielectron = choose(electron, 2)
        dielectron_mass = (dielectron['0'] + dielectron['1']).mass
        dielectron_pt = (dielectron['0'] + dielectron['1']).pt

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[(leading_electron_idx)]
        leading_electron = leading_electron[(leading_electron.pt > 30)]

        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]

        ##Muons

        loose_muon = Collections(ev, "Muon", "veto").get()
        loose_muon = loose_muon[(loose_muon.pt > 20)
                                & (np.abs(loose_muon.eta) < 2.4)]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt', UL=False)
        jet = jet[ak.argsort(
            jet.pt, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, loose_muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        mask = lumimask(ev.run, ev.luminosityBlock)
        ss = (SSelectron)
        os = (OSelectron)
        mass = (ak.min(np.abs(dielectron_mass - 91.2), axis=1) < 15)
        lead_electron = (ak.min(leading_electron.pt, axis=1) > 30)
        jet1 = (ak.num(jet) >= 1)
        jet2 = (ak.num(jet) >= 2)
        num_loose = ((ak.num(loose_electron) == 2) & (ak.num(loose_muon) == 0))

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('mask', (mask))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('mass', mass)
        selection.add('leading', lead_electron)
        selection.add('triggers', triggers)
        selection.add('one jet', jet1)
        selection.add('two jets', jet2)
        selection.add('num_loose', num_loose)

        bl_reqs = ['filter'] + ['mass'] + ['mask'] + ['triggers'] + [
            'leading'
        ] + ['num_loose']
        #bl_reqs = ['filter'] + ['mass'] + ['triggers'] + ['leading'] + ['num_loose']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        j1s_reqs = s_reqs + ['one jet']
        j1s_reqs_d = {sel: True for sel in j1s_reqs}
        j1ss_sel = selection.require(**j1s_reqs_d)

        j1o_reqs = o_reqs + ['one jet']
        j1o_reqs_d = {sel: True for sel in j1o_reqs}
        j1os_sel = selection.require(**j1o_reqs_d)

        j2s_reqs = s_reqs + ['two jets']
        j2s_reqs_d = {sel: True for sel in j2s_reqs}
        j2ss_sel = selection.require(**j2s_reqs_d)

        j2o_reqs = o_reqs + ['two jets']
        j2o_reqs_d = {sel: True for sel in j2o_reqs}
        j2os_sel = selection.require(**j2o_reqs_d)

        output["N_jet"].fill(
            dataset=dataset,
            multiplicity=ak.num(jet)[os_sel],
        )

        return output

Example #20

Show file

    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        if self.year == 2016:
            lumimask = LumiMask(
                '../data/lumi/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt'
            )
        if self.year == 2017:
            lumimask = LumiMask(
                '../data/lumi/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt'
            )
        if self.year == 2018:
            lumimask = LumiMask(
                '../data/lumi/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt'
            )

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        if self.year == 2018:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2017:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2016:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ

        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)]

        loose_electron = Collections(ev, "Electron", "looseFCNC", 0,
                                     self.year).get()
        loose_electron = loose_electron[(loose_electron.pt > 25)
                                        & (np.abs(loose_electron.eta) < 2.4)]

        SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron)
                                                               == 2)
        OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron)
                                                               == 2)

        dielectron = choose(electron, 2)
        dielectron_mass = (dielectron['0'] + dielectron['1']).mass
        dielectron_pt = (dielectron['0'] + dielectron['1']).pt

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[(leading_electron_idx)]
        leading_electron = leading_electron[(leading_electron.pt > 30)]

        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]

        ##Muons

        loose_muon = Collections(ev, "Muon", "looseFCNC", 0, self.year).get()
        loose_muon = loose_muon[(loose_muon.pt > 20)
                                & (np.abs(loose_muon.eta) < 2.4)]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet = jet[~match(jet, loose_muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        #weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))
        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_weight(electron))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset, UL=False)
        mask = lumimask(ev.run, ev.luminosityBlock)
        ss = (SSelectron)
        os = (OSelectron)
        mass = (ak.min(np.abs(dielectron_mass - 91.2), axis=1) < 15)
        lead_electron = (ak.min(leading_electron.pt, axis=1) > 30)
        jet1 = (ak.num(jet) >= 1)
        jet2 = (ak.num(jet) >= 2)
        num_loose = ((ak.num(loose_electron) == 2) & (ak.num(loose_muon) == 0))

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('mask', (mask))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('mass', mass)
        selection.add('leading', lead_electron)
        selection.add('triggers', triggers)
        selection.add('one jet', jet1)
        selection.add('two jets', jet2)
        selection.add('num_loose', num_loose)

        bl_reqs = ['filter'] + ['triggers'] + ['mask']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss'] + ['mass'] + ['num_loose'] + ['leading']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os'] + ['mass'] + ['num_loose'] + ['leading']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        j1s_reqs = s_reqs + ['one jet']
        j1s_reqs_d = {sel: True for sel in j1s_reqs}
        j1ss_sel = selection.require(**j1s_reqs_d)

        j1o_reqs = o_reqs + ['one jet']
        j1o_reqs_d = {sel: True for sel in j1o_reqs}
        j1os_sel = selection.require(**j1o_reqs_d)

        j2s_reqs = s_reqs + ['two jets']
        j2s_reqs_d = {sel: True for sel in j2s_reqs}
        j2ss_sel = selection.require(**j2s_reqs_d)

        j2o_reqs = o_reqs + ['two jets']
        j2o_reqs_d = {sel: True for sel in j2o_reqs}
        j2os_sel = selection.require(**j2o_reqs_d)

        #outputs

        output["electron_data1"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        output["electron_data2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        output["electron_data3"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].phi)),
            weight=weight2.weight()[j1os_sel])

        output["electron_data4"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].phi)),
            weight=weight2.weight()[j1os_sel])

        output["electron_data5"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].phi)),
            weight=weight2.weight()[j2os_sel])

        output["electron_data6"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].phi)),
            weight=weight2.weight()[j2os_sel])

        output["electron_data7"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron_data8"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron_data9"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].phi)),
            weight=weight.weight()[j1ss_sel])

        output["electron_data10"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].phi)),
            weight=weight.weight()[j1ss_sel])

        output["electron_data11"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].phi)),
            weight=weight.weight()[j2ss_sel])

        output["electron_data12"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].phi)),
            weight=weight.weight()[j2ss_sel])

        output["dilep_mass1"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[os_sel])),
            weight=weight2.weight()[os_sel])

        output["dilep_mass2"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j1os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j1os_sel])),
            weight=weight2.weight()[j1os_sel])

        output["dilep_mass3"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j2os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j2os_sel])),
            weight=weight2.weight()[j2os_sel])

        output["dilep_mass4"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[ss_sel])),
            weight=weight.weight()[ss_sel])

        output["dilep_mass5"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j1ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j1ss_sel])),
            weight=weight.weight()[j1ss_sel])

        output["dilep_mass6"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j2ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j2ss_sel])),
            weight=weight.weight()[j2ss_sel])

        output["MET"].fill(dataset=dataset,
                           pt=met_pt[os_sel],
                           weight=weight2.weight()[os_sel])

        output["MET2"].fill(dataset=dataset,
                            pt=met_pt[j1os_sel],
                            weight=weight2.weight()[j1os_sel])

        output["MET3"].fill(dataset=dataset,
                            pt=met_pt[j2os_sel],
                            weight=weight2.weight()[j2os_sel])

        output["MET4"].fill(dataset=dataset,
                            pt=met_pt[ss_sel],
                            weight=weight.weight()[ss_sel])

        output["MET5"].fill(dataset=dataset,
                            pt=met_pt[j1ss_sel],
                            weight=weight.weight()[j1ss_sel])

        output["MET6"].fill(dataset=dataset,
                            pt=met_pt[j2ss_sel],
                            weight=weight.weight()[j2ss_sel])

        output["N_jet"].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_sel],
                             weight=weight2.weight()[os_sel])

        output["N_jet2"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j1os_sel],
                              weight=weight2.weight()[j1os_sel])

        output["N_jet3"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j2os_sel],
                              weight=weight2.weight()[j2os_sel])

        output["N_jet4"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[ss_sel],
                              weight=weight.weight()[ss_sel])

        output["N_jet5"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j1ss_sel],
                              weight=weight.weight()[j1ss_sel])

        output["N_jet6"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j2ss_sel],
                              weight=weight.weight()[j2ss_sel])

        output["PV_npvsGood"].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_sel].npvsGood,
                                   weight=weight2.weight()[os_sel])

        output["PV_npvsGood2"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j1os_sel].npvsGood,
                                    weight=weight2.weight()[j1os_sel])

        output["PV_npvsGood3"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j2os_sel].npvsGood,
                                    weight=weight2.weight()[j2os_sel])

        output["PV_npvsGood4"].fill(dataset=dataset,
                                    multiplicity=ev.PV[ss_sel].npvsGood,
                                    weight=weight.weight()[ss_sel])

        output["PV_npvsGood5"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j1ss_sel].npvsGood,
                                    weight=weight.weight()[j1ss_sel])

        output["PV_npvsGood6"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j2ss_sel].npvsGood,
                                    weight=weight.weight()[j2ss_sel])

        return output