Python sum Examples

Programming Language: Python

Namespace/Package Name: awkward1

Method/Function: sum

Examples at hotexamples.com: 30

Python sum - 30 examples found. These are the top rated real world Python examples of awkward1.sum extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: vector.py Project: rishabhCMS/coffea

 def sum(self, axis=-1):
     return awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
         },
         with_name="TwoVector",
     )

Example #2

Show file

File: vector.py Project: rishabhCMS/coffea

 def sum(self, axis=-1):
     return awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
             "z": awkward1.sum(self.z, axis=axis),
             "t": awkward1.sum(self.t, axis=axis),
         },
         with_name="LorentzVector",
     )

Example #3

Show file

 def sum(self, axis=-1):
     """Sum an array of vectors elementwise using `x` and `y` components"""
     out = awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
         },
         with_name="TwoVector",
         highlevel=False,
     )
     return awkward1._util.wrap(out, cache=self.cache, behavior=self.behavior)

Example #4

Show file

File: candidate.py Project: dntaylor/coffea

 def sum(self, axis=-1):
     return awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
             "z": awkward1.sum(self.z, axis=axis),
             "t": awkward1.sum(self.t, axis=axis),
             "charge": awkward1.sum(self.charge, axis=axis),
         },
         with_name="Candidate",
     )

Example #5

Show file

File: candidate.py Project: maxgalli/coffea

 def sum(self, axis=-1):
     """Sum an array of vectors elementwise using `x`, `y`, `z`, `t`, and `charge` components"""
     return awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
             "z": awkward1.sum(self.z, axis=axis),
             "t": awkward1.sum(self.t, axis=axis),
             "charge": awkward1.sum(self.charge, axis=axis),
         },
         with_name="Candidate",
     )

Example #6

Show file

 def sum(self, axis=-1):
     """Sum an array of vectors elementwise using `x`, `y`, and `z` components"""
     out = awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
             "z": awkward1.sum(self.z, axis=axis),
         },
         with_name="ThreeVector",
         highlevel=False,
     )
     return awkward1.Array(out, behavior=self.behavior)

Example #7

Show file

    def flip_weight(self, electron):

        f_1 = self.evaluator['el'](electron.pt[:,0:1], abs(electron.eta[:,0:1]))
        f_2 = self.evaluator['el'](electron.pt[:,1:2], abs(electron.eta[:,1:2]))

        # For custom measurements
        #f_1 = yahist_2D_lookup(self.ratio, electron.pt[:,0:1], abs(electron.eta[:,0:1]))
        #f_2 = yahist_2D_lookup(self.ratio, electron.pt[:,1:2], abs(electron.eta[:,1:2]))

        # I'm using ak.prod and ak.sum to replace empty arrays by 1 and 0, respectively
        weight = ak.sum(f_1/(1-f_1), axis=1)*ak.prod(1-f_2/(1-f_2), axis=1) + ak.sum(f_2/(1-f_2), axis=1)*ak.prod(1-f_1/(1-f_1), axis=1)

        return weight

Example #8

Show file

def lct_check(csc_accept, csc_chamber, lct_chamber, alct_chamber,
              clct_chamber):

    csc_pass = np.array(ak.num(csc_accept, axis=1) > 0)
    if np.count_nonzero(csc_pass) == 0: return [0, 0, 0, 0]

    csc_cham_pass = csc_chamber[csc_pass]
    lct_cham_pass = lct_chamber[csc_pass]
    alct_cham_pass = alct_chamber[csc_pass]
    clct_cham_pass = clct_chamber[csc_pass]

    #print(csc_cham_pass)
    #print(lct_cham_pass)
    #print(alct_cham_pass)
    #print(clct_cham_pass)

    lct_corr = 0
    alct_corr = 0
    clct_corr = 0
    for i in range(len(csc_cham_pass)):
        for j in range(len(csc_cham_pass[i])):
            if csc_cham_pass[i][j] in lct_cham_pass[i]: lct_corr += 1
            if csc_cham_pass[i][j] in alct_cham_pass[i]: alct_corr += 1
            if csc_cham_pass[i][j] in clct_cham_pass[i]: clct_corr += 1

    total_cham = ak.sum(ak.num(csc_cham_pass))
    lct_eff = lct_corr / total_cham * 100
    alct_eff = alct_corr / total_cham * 100
    clct_eff = clct_corr / total_cham * 100

    return [total_cham, lct_eff, alct_eff, clct_eff]

Example #9

Show file

    def process(self, events):
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']
        muons = ak.zip(
            {
                "pt": events.Muon_pt,
                "eta": events.Muon_eta,
                "phi": events.Muon_phi,
                "mass": events.Muon_mass,
                "charge": events.Muon_charge,
            },
            with_name="PtEtaPhiMCandidate")

        cut = (ak.num(muons) == 2) & (ak.sum(muons.charge) == 0)
        # add first and second muon in every event together
        dimuon = muons[cut][:, 0] + muons[cut][:, 1]

        output["sumw"][dataset] += len(events)
        output["mass"].fill(
            dataset=dataset,
            mass=dimuon.mass,
        )

        return output

Example #10

Show file

File: test_readwrite.py Project: aminnj/pdroot

def test_awkward_accessor():
    x = fletcher.FletcherContinuousArray([[1.0, 2.0], [], [3.0, 4.0, 5.0]])
    y = np.zeros(len(x), dtype=float)
    df = pd.DataFrame(dict(x=x, y=y))
    df.to_root(".test.root", compression_jagged=None)
    df = pd.read_root(".test.root")
    assert df["x"].ak(0).sum().tolist() == [3.0, 0.0, 12.0]
    assert awkward1.sum(df["x"], axis=-1).tolist() == [3.0, 0.0, 12.0]

Example #11

Show file

def debug_eff(llp_accept, csc_accept, emtf_accept, gmt_accept, csc_endcap,
              csc_station, csc_ring, csc_chamber, emtf_endcap, emtf_station):

    # Create a mask for events in acceptance
    llp_pass = np.array(ak.sum(llp_accept, axis=-1) > 0)

    # Apppply mask to the csc, emtf, and gmt
    csc_pass = csc_accept[llp_pass]
    emtf_pass = emtf_accept[llp_pass]
    gmt_pass = gmt_accept[llp_pass]

    mismatch = ak.num(emtf_pass) > ak.flatten(gmt_pass) * 5
    print("Mismatch:")
    print(mismatch)
    print('')

    csc_pass_endcap = csc_endcap[llp_pass]
    csc_pass_station = csc_station[llp_pass]
    csc_pass_ring = csc_ring[llp_pass]
    csc_pass_chamber = csc_chamber[llp_pass]

    print("CSC Masked Array:")
    print(csc_pass[mismatch])
    print("CSC endcap:")
    print(csc_pass_endcap[mismatch])
    print("CSC station:")
    print(csc_pass_station[mismatch])
    print("CSC ring:")
    print(csc_pass_ring[mismatch])
    print("CSC chamber:")
    print(csc_pass_chamber[mismatch])
    print("")

    emtf_pass_endcap = emtf_endcap[llp_pass]
    emtf_pass_station = emtf_station[llp_pass]

    print("EMTF Masked Array:")
    print(emtf_pass[mismatch])
    print("EMTF endcap:")
    print(emtf_pass_endcap[mismatch])
    print("EMTF sector:")
    print(emtf_pass_station[mismatch])
    print("")

    print("GMT Masked Array:")
    print(gmt_pass[mismatch])
    print("")

    # Calculate efficiency for each
    n_acc = np.count_nonzero(llp_pass)
    csc_eff = np.count_nonzero(ak.num(csc_pass, axis=1)) / n_acc * 100
    emtf_eff = np.count_nonzero(ak.num(emtf_pass, axis=1)) / n_acc * 100
    gmt_eff = np.count_nonzero(gmt_pass) / n_acc * 100

    return [n_acc, csc_eff, emtf_eff, gmt_eff]

Example #12

Show file

def get_eff(llp_accept, csc_accept, csc_accept_loose, csc_accept_tight,
            emtf_accept, emtf_accept_loose, gmt_accept):

    # Create a mask for events in acceptance
    llp_pass = np.array(ak.sum(llp_accept, axis=-1) > 0)
    n_acc = np.count_nonzero(llp_pass)
    if n_acc == 0: return [0, 0, 0, 0]

    # Apppply mask to the csc, emtf, and gmt
    csc_pass = csc_accept[llp_pass]
    emtf_pass = emtf_accept[llp_pass]
    gmt_pass = gmt_accept[llp_pass]
    csc_pass_loose = csc_accept_loose[llp_pass]
    csc_pass_tight = csc_accept_tight[llp_pass]
    emtf_pass_loose = emtf_accept_loose[llp_pass]

    # Calculate efficiency for each
    csc_eff = np.count_nonzero(ak.sum(csc_pass, axis=1)) / n_acc * 100
    emtf_eff = np.count_nonzero(ak.sum(emtf_pass, axis=1)) / n_acc * 100
    gmt_eff = np.count_nonzero((gmt_pass)) / n_acc * 100
    csc_eff_loose = np.count_nonzero(ak.sum(csc_pass_loose,
                                            axis=1)) / n_acc * 100
    csc_eff_tight = np.count_nonzero(ak.sum(csc_pass_tight,
                                            axis=1)) / n_acc * 100
    emtf_eff_loose = np.count_nonzero(ak.sum(emtf_pass_loose,
                                             axis=1)) / n_acc * 100
    csc_eff_2loose = np.count_nonzero(
        np.array(ak.sum(csc_pass_loose, axis=1) > 1)) / n_acc * 100

    emtf_one = np.array(ak.sum(emtf_pass, axis=1) > 0)
    emtf_two = np.array(ak.sum(emtf_pass_loose, axis=1) > 0)
    emtf_or = emtf_one | emtf_two
    emtf_eff_or = np.count_nonzero(emtf_or) / n_acc * 100

    return [
        n_acc, csc_eff_loose, csc_eff, csc_eff_tight, csc_eff_2loose, emtf_eff,
        emtf_eff_loose, emtf_eff_or, gmt_eff
    ]

Example #13

Show file

File: jet_study.py Project: JacksonWallace/FCNC

    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        second_lepton = lepton[~(trailing_lepton_idx & leading_lepton_idx)]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        lt = met_pt + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        # define the weight
        weight = Weights(len(ev))

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add(
                "btag",
                self.btagSF.Method1a(btag,
                                     light,
                                     b_direction='central',
                                     c_direction='central'))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        sel = Selection(
            dataset=dataset,
            events=ev,
            year=self.year,
            ele=electron,
            ele_veto=vetoelectron,
            mu=muon,
            mu_veto=vetomuon,
            jet_all=jet,
            jet_central=central,
            jet_btag=btag,
            jet_fwd=fwd,
            met=ev.MET,
        )

        BL = sel.dilep_baseline(SS=False)

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[BL_minusNb],
                           weight=weight.weight()[BL_minusNb])

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run' % dataset] += processor.column_accumulator(
                run_[BL])
            output['%s_lumi' % dataset] += processor.column_accumulator(
                lumi_[BL])
            output['%s_event' % dataset] += processor.column_accumulator(
                event_[BL])

        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations

                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                weight = Weights(len(ev))
                weight.add("weight", ev.weight * cfg['lumi'][self.year])
                weight.add("PU",
                           ev.puWeight,
                           weightUp=ev.puWeightUp,
                           weightDown=ev.puWeightDown,
                           shift=False)
                if var == 'centralUp':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='central',
                                             c_direction='up'))
                elif var == 'centralDown':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='central',
                                             c_direction='down'))
                elif var == 'upCentral':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='up',
                                             c_direction='central'))
                elif var == 'downCentral':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='down',
                                             c_direction='central'))

                weight.add("lepton", self.leptonSF.get(electron, muon))
                met = ev.MET
                sel = Selection(
                    dataset=dataset,
                    events=ev,
                    year=self.year,
                    ele=electron,
                    ele_veto=vetoelectron,
                    mu=muon,
                    mu_veto=vetomuon,
                    jet_all=jet,
                    jet_central=central,
                    jet_btag=btag,
                    jet_fwd=fwd,
                    met=met,
                )

                BL = sel.dilep_baseline(SS=False)

                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[BL_minusNb],
                    weight=weight.weight()[BL_minusNb])

        return output

Example #14

Show file

    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        OSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)<0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        OSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)<0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        lepton   = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)<0, axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        high_p_fwd   = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        high_pt_fwd  = fwd[ak.singletons(ak.argmax(fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator
        
        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]
        
        jf          = cross(high_p_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        deltaEta    = abs(high_p_fwd.eta - jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max     = ak.max(mjf, axis=1)
        
        jj          = choose(jet, 2)
        mjj_max     = ak.max((jj['0']+jj['1']).mass, axis=1)
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        ht_central = ak.sum(central.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        
        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )

        BL = sel.dilep_baseline(cutflow=cutflow, SS=False)
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb])

        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])

        BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd])
        
        BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL_minusMET].pt,
            phi  = ev.MET[BL_minusMET].phi,
            weight = weight.weight()[BL_minusMET]
        )
        
        #output['electron'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(electron[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(electron[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(electron[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        #
        #output['muon'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(muon[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(muon[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(muon[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_p_fwd[BL].pt_nom),
            eta = ak.flatten(high_p_fwd[BL].eta),
            phi = ak.flatten(high_p_fwd[BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['b1'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
            eta = ak.flatten(high_score_btag[:, 0:1][BL].eta),
            phi = ak.flatten(high_score_btag[:, 0:1][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['b2'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
            eta = ak.flatten(high_score_btag[:, 1:2][BL].eta),
            phi = ak.flatten(high_score_btag[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run'%dataset] += processor.column_accumulator(run_[BL])
            output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL])
            output['%s_event'%dataset] += processor.column_accumulator(event_[BL])
        
        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId>1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt>25)]
                jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
                jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons

                central   = jet[(abs(jet.eta)<2.4)]
                btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
                light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd       = getFwdJet(light)
                fwd_noPU  = getFwdJet(light, puId=False)
        
                ## forward jets
                high_p_fwd   = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
                high_pt_fwd  = fwd[ak.singletons(ak.argmax(fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator
        
                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

                met = ev.MET
                met['pt'] = getattr(met, var)

                sel = Selection(
                    dataset = dataset,
                    events = ev,
                    year = self.year,
                    ele = electron,
                    ele_veto = vetoelectron,
                    mu = muon,
                    mu_veto = vetomuon,
                    jet_all = jet,
                    jet_central = central,
                    jet_btag = btag,
                    jet_fwd = fwd,
                    met = met,
                )

                BL = sel.dilep_baseline(SS=False)

                # get the modified selection -> more difficult
                #selection.add('N_jet>2_'+var, (ak.num(jet.pt)>=3)) # stupid bug here...
                #selection.add('N_btag=2_'+var,      (ak.num(btag)==2) ) 
                #selection.add('N_central>1_'+var,   (ak.num(central)>=2) )
                #selection.add('N_fwd>0_'+var,       (ak.num(fwd)>=1) )
                #selection.add('MET>30_'+var, (getattr(ev.MET, var)>30) )

                ### Don't change the selection for now...
                #bl_reqs = os_reqs + ['N_jet>2_'+var, 'MET>30_'+var, 'N_btag=2_'+var, 'N_central>1_'+var, 'N_fwd>0_'+var]
                #bl_reqs_d = { sel: True for sel in bl_reqs }
                #BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_'+var].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])
                BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
                output['N_fwd_'+var].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd])
                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_'+var].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb])
                output['N_central_'+var].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])


                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(jet.pt[:, 0:1][BL]),
                    eta = ak.flatten(jet.eta[:, 0:1][BL]),
                    phi = ak.flatten(jet.phi[:, 0:1][BL]),
                    weight = weight.weight()[BL]
                )
                
                output['b1_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta = ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi = ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight = weight.weight()[BL]
                )
                
                output['fwd_jet_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(high_p_fwd[BL].pt),
                    #p   = ak.flatten(high_p_fwd[BL].p),
                    eta = ak.flatten(high_p_fwd[BL].eta),
                    phi = ak.flatten(high_p_fwd[BL].phi),
                    weight = weight.weight()[BL]
                )

                BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])        
                output['MET_'+var].fill(
                    dataset = dataset,
                    pt  = getattr(ev.MET, var)[BL_minusMET],
                    phi  = ev.MET[BL_minusMET].phi,
                    weight = weight.weight()[BL_minusMET]
                )
        
        return output

Example #15

Show file

    def dilep_baseline(self, omit=[], cutflow=None, tight=False, SS=True):
        '''
        give it a cutflow object if you want it to be filed.
        cuts in the omit list will not be applied
        '''
        self.selection = PackedSelection()

        is_dilep   = ((ak.num(self.ele) + ak.num(self.mu))==2)
        pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0)
        lep0pt     = ((ak.num(self.ele[(self.ele.pt>25)]) + ak.num(self.mu[(self.mu.pt>25)]))>0)
        lep1pt     = ((ak.num(self.ele[(self.ele.pt>20)]) + ak.num(self.mu[(self.mu.pt>20)]))>1)
        lepveto    = ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==2)

        dimu    = choose(self.mu, 2)
        diele   = choose(self.ele, 2)
        dilep   = cross(self.mu, self.ele)

        if SS:
            is_SS = ( ak.any((dimu['0'].charge * dimu['1'].charge)>0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)>0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)>0, axis=1) )
        else:
            is_OS = ( ak.any((dimu['0'].charge * dimu['1'].charge)<0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)<0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)<0, axis=1) )

        lepton = ak.concatenate([self.ele, self.mu], axis=1)
        lepton_pdgId_pt_ordered = ak.fill_none(
            ak.pad_none(
                lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True),
        0)

        triggers  = getTriggers(self.events,
            ak.flatten(lepton_pdgId_pt_ordered[:,0:1]),
            ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset)

        ht = ak.sum(self.jet_all.pt, axis=1)
        st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1)

        self.selection.add('lepveto',       lepveto)
        self.selection.add('dilep',         is_dilep)
        #self.selection.add('filter',        self.filters)
        self.selection.add('trigger',       triggers)
        self.selection.add('p_T(lep0)>25',  lep0pt)
        self.selection.add('p_T(lep1)>20',  lep1pt)
        if SS:
            self.selection.add('SS',            is_SS )
        else:
            self.selection.add('OS',            is_OS )
        self.selection.add('N_jet>3',       (ak.num(self.jet_all)>3) )
        self.selection.add('N_jet>4',       (ak.num(self.jet_all)>4) )
        self.selection.add('N_central>2',   (ak.num(self.jet_central)>2) )
        self.selection.add('N_central>3',   (ak.num(self.jet_central)>3) )
        self.selection.add('N_btag>0',      (ak.num(self.jet_btag)>0) )
        self.selection.add('N_fwd>0',       (ak.num(self.jet_fwd)>0) )
        self.selection.add('MET>30',        (self.met.pt>30) )
        self.selection.add('MET>50',        (self.met.pt>50) )
        self.selection.add('ST>600',        (st>600) )

        ss_reqs = [
        #    'filter',
            'lepveto',
            'dilep',
            'p_T(lep0)>25',
            'p_T(lep1)>20',
            'trigger',
            'SS' if SS else 'OS',
            'N_jet>3',
            'N_central>2',
            'N_btag>0',
            'MET>30',
            'N_fwd>0',
        ]
        
        if tight:
            ss_reqs += [
                'N_jet>4',
                'N_central>3',
                'ST>600',
                'MET>50',
                #'delta_eta',
            ]

        ss_reqs_d = { sel: True for sel in ss_reqs if not sel in omit }
        ss_selection = self.selection.require(**ss_reqs_d)

        if cutflow:
            #
            cutflow_reqs_d = {}
            for req in ss_reqs:
                cutflow_reqs_d.update({req: True})
                cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) )

        return ss_selection

Example #16

Show file

    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.pt > 20) & (abs(electron.eta) < 2.4)]

        electron = electron[((electron.genPartIdx >= 0) &
                             (np.abs(electron.matched_gen.pdgId) == 11)
                             )]  #from here on all leptons are gen-matched

        ##Muons
        muon = Collections(ev, "Muon", "tight").get()
        muon = muon[(muon.pt > 20) & (abs(muon.eta) < 2.4)]

        muon = muon[((muon.genPartIdx >= 0) &
                     (np.abs(muon.matched_gen.pdgId) == 13))]

        ##Leptons

        lepton = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton) == 2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton) == 2)

        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet = jet[ak.argsort(
            jet.pt, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(jet, electron, deltaRCut=0.4)]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)

        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_weight(electron))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('jet', jet_all)

        bl_reqs = ['filter', 'jet']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        #outputs
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[baseline],
                             weight=weight.weight()[baseline])

        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(lepton)[ss_sel],
                             weight=weight.weight()[ss_sel])

        output['N_ele2'].fill(dataset=dataset,
                              multiplicity=ak.num(lepton)[os_sel],
                              weight=weight2.weight()[os_sel])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[ss_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[os_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[os_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        return output

Example #17

Show file

File: acceptance_calc.py Project: rkansal47/boostedhiggs

genpart_pt = full_hh4b_samples['GenPart_pt']
genpart_status = full_hh4b_samples['GenPart_status']

full_hh4b_samples['GenPart_status']
full_hh4b_samples['GenPart_statusFlags']

higgses = (full_hh4b_samples['GenPart_pdgId'] == 25)
full_hh4b_samples['GenPart_status'][higgses]

np.unique(
    ak.to_numpy(
        ak.pad_none(full_hh4b_samples['GenPart_status'][higgses], 40, axis=1)))

ak.pad_none(hh4b_samples['GenPart_status'][higgses], 40, axis=1)
higgs_pt = genpart_pt[(full_hh4b_samples['GenPart_pdgId'] == 25)]

gt_300 = ak.sort(higgs_pt, axis=1)[:, -1] > 300

num_gt_300 = ak.sum(ak.sort(higgs_pt, axis=1)[:, -1] > 300)
num_gt_300 / len(higgs_pt)

fhiggs_pt = genpart_pt[(full_hh4b_samples['GenPart_pdgId'] == 25
                        )][full_hh4b_samples['GenPart_status'][higgses] == 22]

jet_pt = ak.pad_none(full_hh4b_samples['FatJet_pt'], 2, axis=1)[:, :2][gt_300]
jet_msd = ak.pad_none(full_hh4b_samples['FatJet_msoftdrop'], 2,
                      axis=1)[:, :2][gt_300]

ak.sum((jet_pt[:, 0] > 250) * (jet_pt[:, 1] > 250) * (jet_msd[:, 0] > 20) *
       (jet_msd[:, 1] > 20)) / len(jet_pt)

Example #18

Show file

    def process(self, events):
        
        output = self.accumulator.identity()
        
        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet)>=2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        
        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 15) & (np.abs(electron.eta) < 2.4)]

        electron = electron[(electron.genPartIdx >= 0)]
        electron = electron[(np.abs(electron.matched_gen.pdgId)==11)]  #from here on all leptons are gen-matched
        electron = electron[( (electron.genPartFlav==1) | (electron.genPartFlav==15) )] #and now they are all prompt
     
        
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]
        
        leading_parent = find_first_parent(leading_electron.matched_gen)
        trailing_parent = find_first_parent(trailing_electron.matched_gen)
        
       
        is_flipped = ( ( (electron.matched_gen.pdgId*(-1) == electron.pdgId) | (find_first_parent(electron.matched_gen)*(-1) == electron.pdgId) ) & (np.abs(electron.pdgId) == 11) )
        
        
        flipped_electron = electron[is_flipped]
        flipped_electron = flipped_electron[(ak.fill_none(flipped_electron.pt, 0)>0)]
        flipped_electron = flipped_electron[~(ak.is_none(flipped_electron))]
        n_flips = ak.num(flipped_electron)
                
        ##Muons
        muon     = Collections(ev, "Muon", "tightFCNC").get()
        muon = muon[(muon.pt > 15) & (np.abs(muon.eta) < 2.4)]
        
        muon = muon[(muon.genPartIdx >= 0)]
        muon = muon[(np.abs(muon.matched_gen.pdgId)==13)] #from here, all muons are gen-matched
        muon = muon[( (muon.genPartFlav==1) | (muon.genPartFlav==15) )] #and now they are all prompt
       
        
        ##Leptons

        lepton   = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton)==2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton)==2)
        
        emulepton = (ak.num(electron) == 1) & (ak.num(muon) == 1)
        no_mumu = (ak.num(muon) <= 1)
        
        
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        
        
        #jets
        jet       = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet       = jet[ak.argsort(jet.pt, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] 
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights( len(ev) )
        weight2 = Weights( len(ev))
        
        if not dataset=='MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)
            
        weight2.add("charge flip", self.charge_flip_ratio.flip_weight(electron))
                                   
                      
        #selections    
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)
        diele = (ak.num(electron) == 2)
        emu = (emulepton)
        flips = (n_flips == 1)
        no_flips = (n_flips == 0)
        nmm = no_mumu
        
        
        selection = PackedSelection()
        selection.add('filter',      (filters) )
        selection.add('ss',          ss )
        selection.add('os',          os )
        selection.add('jet',         jet_all )
        selection.add('ee',          diele)
        selection.add('emu',         emu)
        selection.add('flip',        flips)
        selection.add('nflip',       no_flips)
        selection.add('no_mumu',     nmm)
        
        bl_reqs = ['filter'] + ['jet']

        bl_reqs_d = { sel: True for sel in bl_reqs }
        baseline = selection.require(**bl_reqs_d)
        
        f_reqs = bl_reqs + ['flip'] + ['ss'] + ['ee']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)
        
        f2_reqs = bl_reqs + ['flip'] + ['ss'] + ['emu']
        f2_reqs_d = {sel: True for sel in f2_reqs}
        flip_sel2 = selection.require(**f2_reqs_d)
        
        f3_reqs = bl_reqs + ['flip'] + ['ss'] + ['no_mumu']
        f3_reqs_d = {sel: True for sel in f3_reqs}
        flip_sel3 = selection.require(**f3_reqs_d)
        
        nf_reqs = bl_reqs + ['nflip'] + ['os'] + ['ee']
        nf_reqs_d = {sel: True for sel in nf_reqs}
        n_flip_sel = selection.require(**nf_reqs_d)
        
        nf2_reqs = bl_reqs + ['nflip'] + ['os'] + ['emu']
        nf2_reqs_d = {sel: True for sel in nf2_reqs}
        n_flip_sel2 = selection.require(**nf2_reqs_d)
        
        nf3_reqs = bl_reqs + ['nflip'] + ['os'] + ['no_mumu']
        nf3_reqs_d = {sel: True for sel in nf3_reqs}
        n_flip_sel3 = selection.require(**nf3_reqs_d)
        
        s_reqs = bl_reqs + ['ss'] + ['no_mumu']
        s_reqs_d = { sel: True for sel in s_reqs }
        ss_sel = selection.require(**s_reqs_d)
        
        o_reqs = bl_reqs + ['os'] + ['no_mumu']
        o_reqs_d = {sel: True for sel in o_reqs }
        os_sel = selection.require(**o_reqs_d)
        
        ees_reqs = bl_reqs + ['ss'] + ['ee']
        ees_reqs_d = { sel: True for sel in ees_reqs }
        eess_sel = selection.require(**ees_reqs_d)
        
        eeo_reqs = bl_reqs + ['os'] + ['ee']
        eeo_reqs_d = {sel: True for sel in eeo_reqs }
        eeos_sel = selection.require(**eeo_reqs_d)
        
        ems_reqs = bl_reqs + ['ss'] + ['emu']
        ems_reqs_d = { sel: True for sel in ems_reqs }
        emss_sel = selection.require(**ems_reqs_d)
        
        emo_reqs = bl_reqs + ['os'] + ['emu']
        emo_reqs_d = {sel: True for sel in emo_reqs }
        emos_sel = selection.require(**emo_reqs_d)
        
       
        #outputs
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[baseline], weight=weight.weight()[baseline])
        
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(lepton)[ss_sel], weight=weight.weight()[ss_sel])
                      
        output['N_ele2'].fill(dataset=dataset, multiplicity=ak.num(lepton)[os_sel], weight=weight2.weight()[os_sel])
        
        output['electron_flips'].fill(dataset=dataset, multiplicity = n_flips[flip_sel], weight=weight.weight()[flip_sel])

        output['electron_flips2'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel], weight=weight2.weight()[n_flip_sel])
        
        output['electron_flips3'].fill(dataset=dataset, multiplicity = n_flips[flip_sel2], weight=weight.weight()[flip_sel2])

        output['electron_flips4'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel2], weight=weight2.weight()[n_flip_sel2])
        

        output["electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel3].eta))),
            weight = weight.weight()[flip_sel3]
        )
        
        output["electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].eta))),
            weight = weight2.weight()[n_flip_sel3]
        )
        
        output["flipped_electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel].eta))),
            weight = weight.weight()[flip_sel]
        )
        
        output["flipped_electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].eta))),
            weight = weight2.weight()[n_flip_sel]
        )
        
        output["flipped_electron3"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel2].eta))),
            weight = weight.weight()[flip_sel2]
        )
        
        output["flipped_electron4"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].eta))),
            weight = weight2.weight()[n_flip_sel2]
        )
        
        #output["lepton_parent"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(leading_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)
        #
        #output["lepton_parent2"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(trailing_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)

        return output

Example #19

Show file

File: flatPlots.py Project: akhter-towsifa/GEMCode

lct_trigger_rate = []
alct_daq_rate = []
clct_daq_rate = []
lct_daq_rate = []

gem_trigger_rate = []
gem_daq_rate = []

## trigger rates for each station
for ids in range(0, 9):
    station = id_to_station[ids][0]
    ring = id_to_station[ids][1]
    cuts = ((csc_alct.bx == 3) & (csc_alct.station == station) &
            (csc_alct.ring == ring))
    ak.to_list(cuts)
    passEvents = ak.sum(cuts)
    objectRate = passEvents * normalization
    print(
        "Trigger Rate for ALCT for CSC station = %d, ring = %d (BX3): %f kHz" %
        (station, ring, objectRate))

for ids in range(0, 9):
    station = id_to_station[ids][0]
    ring = id_to_station[ids][1]
    cuts = ((csc_clct.bx == 7) & (csc_clct.station == station) &
            (csc_clct.ring == ring))
    ak.to_list(cuts)
    passEvents = ak.sum(cuts)
    objectRate = passEvents * normalization
    print(
        "Trigger Rate for CLCT for CSC station = %d, ring = %d (BX7): %f kHz" %

Example #20

Show file

File: mainProcessor.py Project: tresreid/SUEP_coffea

    def process(self, df):
        output = self.accumulator.identity()

        # important variables
        obj = Objects(df)
        mask = bl.getBaselineMask(df)

        electrons = obj.goodElectrons()[mask]
        muons = obj.goodMuons()[mask]
        jets = obj.goodJets()[mask]
        madHT_cut = df['madHT'][mask]
        met = df['MET'][mask]
        metPhi = df['METPhi'][mask]
        tracks = obj.goodTracks()[mask]
        output['cutflow']['all events'] += jets.size

        # Hack: defining weights: same length as the number of events in the chunk
        # Check if this is what other people do too
        luminosity = 21071.0 + 38654.0
        evtw = df['Weight'][mask] * luminosity
        ew = utl.awkwardReshape(electrons, evtw)
        mw = utl.awkwardReshape(muons, evtw)
        jw = utl.awkwardReshape(jets, evtw)
        tw = utl.awkwardReshape(tracks, evtw)

        # Getting subset of variables based on number of AK8 jets
        # calculating event variables
        ht = ak.sum(jets.pt, axis=1)
        st = ht + met
        #dPhiMinJ = utl.deltaPhi(fjets.phi,metPhi).min()
        dPhiMinj = utl.deltaPhi(jets.phi, metPhi).min()

        #
        # Trigger Study...
        #
        cut_met = met > 250
        cut_ht_offline = ht > 1200
        cut_ht_scouting = ht > 500
        jets_met = jets[cut_met]
        jets_ht_off = jets[cut_ht_offline]
        jets_ht_scout = jets[cut_ht_scouting]
        output['trigger']['all events'] += jets.size
        output['trigger']['met'] += jets_met.size
        output['trigger']['ht_offline'] += jets_ht_off.size
        output['trigger']['ht_scouting'] += jets_ht_scout.size

        ## at least 1 AK4 Jet
        cut_1j = jets.counts >= 1
        jets_1j = jets[cut_1j]
        metPhi_1j = metPhi[cut_1j]
        evtw_1j = evtw[cut_1j]
        dPhij1 = utl.deltaPhi(jets_1j.phi[:, 0], metPhi_1j)
        output['cutflow']['one jet'] += jets_1j.size

        ## at least 2 AK4 Jets
        cut_2j = jets.counts >= 2
        jets_2j = jets[cut_2j]
        metPhi_2j = metPhi[cut_2j]
        evtw_2j = evtw[cut_2j]
        j1_eta = np.array(jets_2j.eta[:, 0])
        j2_eta = np.array(jets_2j.eta[:, 1])
        j1_phi = np.array(jets_2j.phi[:, 0])
        j2_phi = np.array(jets_2j.phi[:, 1])
        dEtaj12 = abs(j1_eta - j2_eta)
        deltaR12j = utl.delta_R(j1_phi, j2_phi, j1_eta, j2_eta)
        dPhij1_2j = utl.deltaPhi(j1_phi, metPhi_2j)
        dPhij2 = utl.deltaPhi(j2_phi, metPhi_2j)
        output['cutflow']['two jets'] += jets_2j.size

        ### at least 1 AK8 Jet
        #cut_1fj = fjets.counts >= 1
        #fjets_1fj = fjets[cut_1fj]
        #metPhi_1fj = metPhi[cut_1fj]
        #evtw_1fj = evtw[cut_1fj]
        #dPhiJ1 = utl.deltaPhi(fjets_1fj.phi[:,0],metPhi_1fj)
        ### at least 2 AK8 Jets
        #cut_2fj = fjets.counts >= 2
        #fjets_2fj = fjets[cut_2fj]
        #metPhi_2fj = metPhi[cut_2fj]
        #evtw_2fj = evtw[cut_2fj]
        #J1_eta = np.array(fjets_2fj.eta[:,0])
        #J2_eta = np.array(fjets_2fj.eta[:,1])
        #J1_phi = np.array(fjets_2fj.phi[:,0])
        #J2_phi = np.array(fjets_2fj.phi[:,1])
        #dEtaJ12 = abs(J1_eta - J2_eta)
        #deltaR12J = utl.delta_R(J1_phi,J2_phi,J1_eta,J2_eta)
        #dPhiJ1_2fj = utl.deltaPhi(J1_phi,metPhi_2fj)
        #dPhiJ2 = utl.deltaPhi(J2_phi,metPhi_2fj)

        ## twofjets = (fjets.counts >= 2)
        ## difjets = fjets[twofjets]
        ## ptcut = (difjets.pt[:,0] > 200) & (difjets.pt[:,1] > 200)
        ## difjets_pt200 = difjets[ptcut]

        #twofjets = (fjets.counts >= 2)
        #output['cutflow']['two fjets'] += twofjets.sum()

        ## difjets = fjets[twofjets]
        ## difjets_pt200 = difjets[ptcut]
        ## output['jtpt'].fill(dataset=dataset, pt=fjets.pt.flatten())
        ## output['jteta'].fill(dataset=dataset, eta=fjets.eta.flatten())

        output['h_ntracks'].fill(ntracks=tracks.counts.flatten(), weight=evtw)
        output['h_njets'].fill(njets=jets.counts.flatten(), weight=evtw)
        output['h_ht'].fill(ht=ht, weight=evtw)
        output['h_st'].fill(st=st, weight=evtw)
        output['h_met'].fill(MET=met, weight=evtw)
        #output['h_jPt'].fill(pt=jets.pt.flatten(),weight=ak.flatten(jw))
        #output['h_jEta'].fill(eta=jets.eta.flatten(),weight=ak.flatten(jw))
        #output['h_jPhi'].fill(phi=jets.phi.flatten(),weight=ak.flatten(jw))
        #output['h_jAxismajor'].fill(axismajor=jets.axismajor.flatten(),weight=ak.flatten(jw))
        #output['h_jAxisminor'].fill(axisminor=jets.axisminor.flatten(),weight=ak.flatten(jw))
        #output['h_jPtD'].fill(ptD=jets.ptD.flatten(),weight=ak.flatten(jw))
        #output['h_jPtAK8'].fill(pt=fjets.pt.flatten(),weight=ak.flatten(fjw))
        #output['h_jEtaAK8'].fill(eta=fjets.eta.flatten(),weight=ak.flatten(fjw))
        #output['h_jPhiAK8'].fill(phi=fjets.phi.flatten(),weight=ak.flatten(fjw))
        #output['h_jAxismajorAK8'].fill(axismajor=fjets.axismajor.flatten(),weight=ak.flatten(fjw))
        #output['h_jAxisminorAK8'].fill(axisminor=fjets.axisminor.flatten(),weight=ak.flatten(fjw))
        #output['h_jGirthAK8'].fill(girth=fjets.girth.flatten(),weight=ak.flatten(fjw))
        #output['h_jPtDAK8'].fill(ptD=fjets.ptD.flatten(),weight=ak.flatten(fjw))
        #output['h_jTau1AK8'].fill(tau1=fjets.tau1.flatten(),weight=ak.flatten(fjw))
        #output['h_jTau2AK8'].fill(tau2=fjets.tau2.flatten(),weight=ak.flatten(fjw))
        #output['h_jTau3AK8'].fill(tau3=fjets.tau3.flatten(),weight=ak.flatten(fjw))
        #output['h_jTau21AK8'].fill(tau21=fjets[fjets.tau1 > 0].tau2.flatten()/fjets[fjets.tau1 > 0].tau1.flatten(),weight=ak.flatten(fjw)[(fjets.tau1 > 0).flatten()])
        #output['h_jTau32AK8'].fill(tau32=fjets[fjets.tau2 > 0].tau3.flatten()/fjets[fjets.tau2 > 0].tau2.flatten(),weight=ak.flatten(fjw)[(fjets.tau2 > 0).flatten()])
        #output['h_jSoftDropMassAK8'].fill(softDropMass=fjets.softDropMass.flatten(),weight=ak.flatten(fjw))
        #output['h_dEtaJ12'].fill(dEtaJ12=dEtaJ12,weight=evtw_2fj)
        #output['h_dRJ12'].fill(dRJ12=deltaR12J,weight=evtw_2fj)
        #output['h_dPhiJ1MET'].fill(dPhiJMET=dPhiJ1,weight=evtw_1fj)
        #output['h_dPhiJ2MET'].fill(dPhiJMET=dPhiJ2,weight=evtw_2fj)
        #output['h_dPhiMinJMET'].fill(dPhiJMET=dPhiMinJ,weight=evtw)
        #output['h_dPhiJ1METrdPhiJ2MET'].fill(dPhiJ1METrdPhiJ2MET=dPhiJ1_2fj[dPhiJ2>0]/dPhiJ2[dPhiJ2>0],weight=evtw_2fj[dPhiJ2>0])
        #output['h_mT'].fill(mT=mtAK8,weight=evtw)
        #output['h_METrHT_pt30'].fill(METrHT_pt30=met[ht>0]/ht[ht>0],weight=evtw[ht>0])
        #output['h_METrST_pt30'].fill(METrST_pt30=met[st>0]/st[st>0],weight=evtw[st>0])
        ## Cut: at least 2 AK8 jets
        #output['h_njets_ge2AK8j'].fill(njets=jets[cut_2fj].counts.flatten(),weight=evtw[cut_2fj])
        #output['h_njetsAK8_ge2AK8j'].fill(njets=fjets[cut_2fj].counts.flatten(),weight=evtw[cut_2fj])
        #output['h_ht_ge2AK8j'].fill(ht=ht[cut_2fj],weight=evtw[cut_2fj])
        #output['h_st_ge2AK8j'].fill(st=st[cut_2fj],weight=evtw[cut_2fj])
        #output['h_met_ge2AK8j'].fill(MET=met[cut_2fj],weight=evtw[cut_2fj])
        #output['h_dPhiJ1MET_ge2AK8j'].fill(dPhiJMET=dPhiJ1[fjets_1fj.counts >= 2],weight=evtw_1fj[fjets_1fj.counts >= 2])
        #output['h_dPhiMinJMET_ge2AK8j'].fill(dPhiJMET=dPhiMinJ[cut_2fj],weight=evtw[cut_2fj])
        #output['h_METrHT_pt30_ge2AK8j'].fill(METrHT_pt30=met[(cut_2fj) & (ht>0)]/ht[(cut_2fj) & (ht>0)],weight=evtw[(cut_2fj) & (ht>0)])
        #output['h_METrST_pt30_ge2AK8j'].fill(METrST_pt30=met[(cut_2fj) & (st>0)]/st[(cut_2fj) & (st>0)],weight=evtw[(cut_2fj) & (st>0)])
        #output['h_mT_ge2AK8j'].fill(mT=mtAK8[cut_2fj],weight=evtw[cut_2fj])
        ## more AK4 jets variables
        #output['h_dEtaj12'].fill(dEtaJ12=dEtaj12,weight=evtw_2j)
        #output['h_dRj12'].fill(dRJ12=deltaR12j,weight=evtw_2j)
        #output['h_dPhij1MET'].fill(dPhiJMET=dPhij1,weight=evtw_1j)
        #output['h_dPhij2MET'].fill(dPhiJMET=dPhij2,weight=evtw_2j)
        #output['h_dPhiMinjMET'].fill(dPhiJMET=dPhiMinj,weight=evtw)
        #output['h_dPhij1METrdPhij2MET'].fill(dPhiJ1METrdPhiJ2MET=dPhij1_2j[dPhij2>0]/dPhij2[dPhij2>0],weight=evtw_2j[dPhij2>0])
        ## Cut: at least 2 AK4 jets
        #output['h_njets_ge2AK4j'].fill(njets=jets[cut_2j].counts.flatten(),weight=evtw[cut_2j])
        #output['h_njetsAK8_ge2AK4j'].fill(njets=fjets[cut_2j].counts.flatten(),weight=evtw[cut_2j])
        #output['h_ht_ge2AK4j'].fill(ht=ht[cut_2j],weight=evtw[cut_2j])
        #output['h_st_ge2AK4j'].fill(st=st[cut_2j],weight=evtw[cut_2j])
        #output['h_met_ge2AK4j'].fill(MET=met[cut_2j],weight=evtw[cut_2j])
        #output['h_dPhij1MET_ge2AK4j'].fill(dPhiJMET=dPhij1[jets_1j.counts >= 2],weight=evtw_1j[jets_1j.counts >= 2])
        #output['h_dPhiMinjMET_ge2AK4j'].fill(dPhiJMET=dPhiMinj[cut_2j],weight=evtw[cut_2j])
        #output['h_METrHT_pt30_ge2AK4j'].fill(METrHT_pt30=met[(cut_2j) & (ht>0)]/ht[(cut_2j) & (ht>0)],weight=evtw[(cut_2j) & (ht>0)])
        #output['h_METrST_pt30_ge2AK4j'].fill(METrST_pt30=met[(cut_2j) & (st>0)]/st[(cut_2j) & (st>0)],weight=evtw[(cut_2j) & (st>0)])
        #output['h_mT_ge2AK4j'].fill(mT=mtAK8[cut_2j],weight=evtw[cut_2j])
        #output['h_madHT'].fill(ht=madHT_cut,weight=evtw)
        return output

Example #21

Show file

File: Histograms.py Project: XL-Seb-Yan/toffea

    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:  #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt -
                            (selected_jets[:, 1] +
                             selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt -
                            (selected_jets[:, 2] +
                             selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt -
                            (selected_jets[:, 0] +
                             selected_jets[:, 1]).pt) / m3j

        # Event selection masks
        selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(
            **{name: True
               for name in selection.names})
        selection_masks["Pre-selection"] = sel_mask

        # HLT_trigger (this is already done at the beginning)
        # if year == "2016":
        # JetHLT_mask = []
        # if "2016B2" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # elif "2016H" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # else:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2017":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2018":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]

        # Fill histograms
        for selection, selection_mask in selection_masks.items():
            output["mjjj"].fill(dataset=dataset_name,
                                selection=selection,
                                mjjj=m3j[selection_mask])

            output["m_ij"].fill(dataset=dataset_name,
                                selection=selection,
                                m_01=m_ij[:, 0][selection_mask],
                                m_12=m_ij[:, 1][selection_mask],
                                m_20=m_ij[:, 2][selection_mask])

            output["dR_ij"].fill(dataset=dataset_name,
                                 selection=selection,
                                 dR_01=dR_ij[:, 0][selection_mask],
                                 dR_12=dR_ij[:, 1][selection_mask],
                                 dR_20=dR_ij[:, 2][selection_mask])

            output["dEta_ij"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dEta_01=dEta_ij[:, 0][selection_mask],
                                   dEta_12=dEta_ij[:, 1][selection_mask],
                                   dEta_20=dEta_ij[:, 2][selection_mask])

            output["moverM_ij"].fill(dataset=dataset_name,
                                     selection=selection,
                                     moverM_01=m_01_overM[selection_mask],
                                     moverM_12=m_12_overM[selection_mask],
                                     moverM_20=m_20_overM[selection_mask])

            output["pt_i"].fill(dataset=dataset_name,
                                selection=selection,
                                pt_0=selected_jets[:, 0][selection_mask].pt,
                                pt_1=selected_jets[:, 1][selection_mask].pt,
                                pt_2=selected_jets[:, 2][selection_mask].pt)

            output["eta_i"].fill(dataset=dataset_name,
                                 selection=selection,
                                 eta_0=selected_jets[:, 0][selection_mask].eta,
                                 eta_1=selected_jets[:, 1][selection_mask].eta,
                                 eta_2=selected_jets[:, 2][selection_mask].eta)

            output["ptoverM_i"].fill(dataset=dataset_name,
                                     selection=selection,
                                     ptoverM_0=pt_i_overM[:,
                                                          0][selection_mask],
                                     ptoverM_1=pt_i_overM[:,
                                                          1][selection_mask],
                                     ptoverM_2=pt_i_overM[:,
                                                          2][selection_mask])

            output["dR_i_jk"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dR_0_12=dR_i_jk[:, 0][selection_mask],
                                   dR_1_20=dR_i_jk[:, 1][selection_mask],
                                   dR_2_01=dR_i_jk[:, 2][selection_mask])

            output["dEta_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dEta_0_12=dEta_i_jk[:, 0][selection_mask],
                                     dEta_1_20=dEta_i_jk[:, 1][selection_mask],
                                     dEta_2_01=dEta_i_jk[:, 2][selection_mask])

            output["dPhi_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dPhi_0_12=dPhi_i_jk[:, 0][selection_mask],
                                     dPhi_1_20=dPhi_i_jk[:, 1][selection_mask],
                                     dPhi_2_01=dPhi_i_jk[:, 2][selection_mask])

            output["dPtoverM_i_jk"].fill(
                dataset=dataset_name,
                selection=selection,
                dPtoverM_0_12=dPtoverM_0_12[selection_mask],
                dPtoverM_1_20=dPtoverM_1_20[selection_mask],
                dPtoverM_2_01=dPtoverM_2_01[selection_mask])
            pt_i_overM_2fill = pt_i_overM[selection_mask]
            dR_ij_2fill = dR_ij[selection_mask]
            dEta_ij_2fill = dEta_ij[selection_mask]
            dR_i_jk_2fill = dR_i_jk[selection_mask]
            dEta_i_jk_2fill = dEta_i_jk[selection_mask]
            dPhi_i_jk_2fill = dPhi_i_jk[selection_mask]
            dPtoverM_0_12_2fill = dPtoverM_0_12[selection_mask]
            dPtoverM_1_20_2fill = dPtoverM_1_20[selection_mask]
            dPtoverM_2_01_2fill = dPtoverM_2_01[selection_mask]
            selected_jets_2fill = selected_jets[selection_mask]

            max_pt_overM_2fill = awk.max(pt_i_overM_2fill, axis=1)
            min_pt_overM_2fill = awk.min(pt_i_overM_2fill, axis=1)
            max_dR_2fill = awk.max(dR_ij_2fill, axis=1)
            max_dEta_2fill = awk.max(dEta_ij_2fill, axis=1)
            min_dR_2fill = awk.min(dR_ij_2fill, axis=1)
            min_dEta_2fill = awk.min(dEta_ij_2fill, axis=1)
            min_pt_2fill = awk.min(selected_jets_2fill.pt, axis=1)
            max_eta_2fill = awk.max(abs(selected_jets_2fill.eta), axis=1)
            max_dR_i_jk_2fill = awk.max(dR_i_jk_2fill, axis=1)
            min_dR_i_jk_2fill = awk.min(dR_i_jk_2fill, axis=1)
            max_dEta_i_jk_2fill = awk.max(dEta_i_jk_2fill, axis=1)
            min_dEta_i_jk_2fill = awk.min(dEta_i_jk_2fill, axis=1)
            max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk_2fill, axis=1)
            min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk_2fill, axis=1)
            max_dPtoverM_i_jk_2fill = []
            min_dPtoverM_i_jk_2fill = []
            for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill,
                            dPtoverM_2_01_2fill):
                max_dPtoverM_i_jk_2fill.append(max(pair))
                min_dPtoverM_i_jk_2fill.append(min(pair))
            max_dPtoverM_i_jk_2fill = np.array(max_dPtoverM_i_jk_2fill)
            min_dPtoverM_i_jk_2fill = np.array(min_dPtoverM_i_jk_2fill)

            max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
            min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
            max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
            max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
            min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
            min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
            min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
            max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
            max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
            min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
            max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
            min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
            max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
            min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)

            output["max_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  max_dR=max_dR_2fill)

            output["max_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    max_dEta=max_dEta_2fill)

            output["min_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_dR=min_dR_2fill)

            output["min_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    min_dEta=min_dEta_2fill)

            output["min_pt"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_pt=min_pt_2fill)

            output["max_eta"].fill(dataset=dataset_name,
                                   selection=selection,
                                   max_eta=max_eta_2fill)

            output["max_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_ptoverM=max_pt_overM_2fill)

            output["min_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_ptoverM=min_pt_overM_2fill)

            output["max_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_dR_j_jj=max_dR_i_jk_2fill)

            output["max_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dEta_j_jj=max_dEta_i_jk_2fill)

            output["max_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dPhi_j_jj=max_dPhi_i_jk_2fill)

            output["max_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                max_dPtoverM_j_jj=max_dPtoverM_i_jk_2fill)

            output["min_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_dR_j_jj=min_dR_i_jk_2fill)

            output["min_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dEta_j_jj=min_dEta_i_jk_2fill)

            output["min_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dPhi_j_jj=min_dPhi_i_jk_2fill)

            output["min_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                min_dPtoverM_j_jj=min_dPtoverM_i_jk_2fill)

        return output

Example #22

Show file

    def process(self, events):
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']
        output['sumw'][dataset] += ak.sum(events.genWeight)
        
        ##############
        # Trigger level
        triggers = [
        "HLT_Mu12_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_DZ",
        "HLT_Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL_DZ",    
        ]
        
        trig_arrs = [events.HLT[_trig.strip("HLT_")] for _trig in triggers]
        req_trig = np.zeros(len(events), dtype='bool')
        for t in trig_arrs:
            req_trig = req_trig | t

        ############
        # Event level
        
        ## Muon cuts
        # muon twiki: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2
        events.Muon = events.Muon[(events.Muon.pt > 30) & (abs(events.Muon.eta < 2.4))] # & (events.Muon.tightId > .5)
        events.Muon = ak.pad_none(events.Muon, 1, axis=1) 
        req_muon =(ak.count(events.Muon.pt, axis=1) == 1)
        
        ## Electron cuts
        # electron twiki: https://twiki.cern.ch/twiki/bin/viewauth/CMS/CutBasedElectronIdentificationRun2
        events.Electron = events.Electron[(events.Electron.pt > 30) & (abs(events.Electron.eta) < 2.4)]
        events.Electron = ak.pad_none(events.Electron, 1, axis=1) 
        req_ele = (ak.count(events.Electron.pt, axis=1) == 1)
        
        ## Jet cuts
        events.Jet = events.Jet[(events.Jet.pt > 25) & (abs(events.Jet.eta) <= 2.5)]
        req_jets = (ak.count(events.Jet.pt, axis=1) >= 2)    
        
        req_opposite_charge = events.Electron[:, 0].charge * events.Muon[:, 0].charge == -1
        
        event_level = req_trig & req_muon & req_ele & req_opposite_charge & req_jets
        
        # Selected
        selev = events[event_level]    
        
        #########
        
        # Per electron
        el_eta   = (abs(selev.Electron.eta) <= 2.4)
        el_pt    = selev.Electron.pt > 30
        el_level = el_eta & el_pt
        
        # Per muon
        mu_eta   = (abs(selev.Muon.eta) <= 2.4)
        mu_pt    = selev.Muon.pt > 30
        mu_level = mu_eta & mu_pt
        
        # Per jet
        jet_eta    = (abs(selev.Jet.eta) <= 2.4)
        jet_pt     = selev.Jet.pt > 25
        jet_pu     = ( ((selev.Jet.puId > 6) & (selev.Jet.pt < 50)) | (selev.Jet.pt > 50) ) 
        jet_id     = selev.Jet.jetId >= 2 
        #jet_id     = selev.Jet.isTight() == 1 & selev.Jet.isTightLeptonVeto() == 0
        jet_level  = jet_pu & jet_eta & jet_pt & jet_id

        # b-tag twiki : https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X
        bjet_disc_t  = selev.Jet.btagDeepB > 0.7264 # L=0.0494, M=0.2770, T=0.7264
        bjet_disc_m  = selev.Jet.btagDeepB > 0.2770 # L=0.0494, M=0.2770, T=0.7264
        bjet_disc_l  = selev.Jet.btagDeepB > 0.0494 # L=0.0494, M=0.2770, T=0.7264
        bjet_level_t = jet_level & bjet_disc_t
        bjet_level_m = jet_level & bjet_disc_m
        bjet_level_l = jet_level & bjet_disc_l
        
        sel    = selev.Electron[el_level]
        smu    = selev.Muon[mu_level]
        sjets  = selev.Jet[jet_level]
        sbjets_t = selev.Jet[bjet_level_t]
        sbjets_m = selev.Jet[bjet_level_m]
        sbjets_l = selev.Jet[bjet_level_l]
        
        # output['pt'].fill(dataset=dataset, pt=selev.Jet.pt.flatten())
        # Fill histograms dynamically  
        for histname, h in output.items():
            if (histname not in self.jet_hists) and (histname not in self.deepcsv_hists): continue
            # Get valid fields perhistogram to fill
            fields = {k: ak.flatten(sjets[k], axis=None) for k in h.fields if k in dir(sjets)}
            h.fill(dataset=dataset, **fields)


        def flatten(ar): # flatten awkward into a 1d array to hist
            return ak.flatten(ar, axis=None)

        def num(ar):
            return ak.num(ak.fill_none(ar[~ak.is_none(ar)], 0), axis=0)

        output['njet'].fill(dataset=dataset,  njet=flatten(ak.num(sjets)))
        output['nbjet_t'].fill(dataset=dataset, nbjet_t=flatten(ak.num(sbjets_t)))
        output['nbjet_m'].fill(dataset=dataset, nbjet_m=flatten(ak.num(sbjets_m)))
        output['nbjet_l'].fill(dataset=dataset, nbjet_l=flatten(ak.num(sbjets_l)))
        output['nel'].fill(dataset=dataset,   nel=flatten(ak.num(sel)))
        output['nmu'].fill(dataset=dataset,   nmu=flatten(ak.num(smu)))

        output['lelpt'].fill(dataset=dataset, lelpt=flatten(selev.Electron[:, 0].pt))
        output['lmupt'].fill(dataset=dataset, lmupt=flatten(selev.Muon[:, 0].pt))
        output['ljpt'].fill(dataset=dataset,  ljpt=flatten(selev.Jet[:, 0].pt))
        output['sljpt'].fill(dataset=dataset,  sljpt=flatten(selev.Jet[:, 1].pt))

        return output

Example #23

Show file

File: SS_analysis.py Project: JacksonWallace/tW_scattering

    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Generated leptons
        gen_lep = ev.GenL
        leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
        trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
        n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        ## event selectors
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        
        dilep     = ((ak.num(electron) + ak.num(muon))==2)
        pos_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))>0)
        lep0pt    = ((ak.num(electron[(electron.pt>25)]) + ak.num(muon[(muon.pt>25)]))>0)
        lep0pt_40 = ((ak.num(electron[(electron.pt>40)]) + ak.num(muon[(muon.pt>40)]))>0)
        lep0pt_100 = ((ak.num(electron[(electron.pt>100)]) + ak.num(muon[(muon.pt>100)]))>0)
        lep1pt    = ((ak.num(electron[(electron.pt>20)]) + ak.num(muon[(muon.pt>20)]))>1)
        lep1pt_30 = ((ak.num(electron[(electron.pt>30)]) + ak.num(muon[(muon.pt>30)]))>1)
        lepveto   = ((ak.num(vetoelectron) + ak.num(vetomuon))==2)
        
        # define the weight
        weight = Weights( len(ev) )
        
        #mult = 1
        #if dataset=='inclusive': mult = 0.0478/47.448
        #if dataset=='plus': mult = 0.0036/7.205

        if not dataset=='MuonEG':
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        selection = PackedSelection()
        selection.add('lepveto',       lepveto)
        selection.add('dilep',         dilep )
        selection.add('filter',        (filters) )
        selection.add('p_T(lep0)>25',  lep0pt )
        selection.add('p_T(lep0)>40',  lep0pt_40 )
        selection.add('p_T(lep1)>20',  lep1pt )
        selection.add('p_T(lep1)>30',  lep1pt_30 )
        selection.add('SS',            ( SSlepton | SSelectron | SSmuon) )
        selection.add('pos',           ( pos_charge ) )
        selection.add('neg',           ( neg_charge ) )
        selection.add('N_jet>3',       (ak.num(jet)>=4) )
        selection.add('N_jet>4',       (ak.num(jet)>=5) )
        selection.add('N_central>2',   (ak.num(central)>=3) )
        selection.add('N_central>3',   (ak.num(central)>=4) )
        selection.add('N_btag>0',      (ak.num(btag)>=1) )
        selection.add('MET>50',        (ev.MET.pt>50) )
        selection.add('ST',            (st>600) )
        selection.add('N_fwd>0',       (ak.num(fwd)>=1 ))
        selection.add('delta_eta',     (ak.any(delta_eta>2, axis=1) ) )
        selection.add('fwd_p>500',     (ak.any(j_fwd.p>500, axis=1) ) )
        
        ss_reqs = ['lepveto', 'dilep', 'SS', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'N_jet>3', 'N_central>2', 'N_btag>0']
        bl_reqs = ss_reqs + ['N_fwd>0', 'N_jet>4', 'N_central>3', 'ST', 'MET>50', 'delta_eta']
        sr_reqs = bl_reqs + ['fwd_p>500', 'p_T(lep0)>40', 'p_T(lep1)>30']

        ss_reqs_d = { sel: True for sel in ss_reqs }
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = { sel: True for sel in bl_reqs }
        BL = selection.require(**bl_reqs_d)
        sr_reqs_d = { sel: True for sel in sr_reqs }
        SR = selection.require(**sr_reqs_d)

        cutflow     = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in sr_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow( req, selection.require(**cutflow_reqs_d) )
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvs, weight=weight.weight()[ss_selection])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvsGood, weight=weight.weight()[ss_selection])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[ss_selection], weight=weight.weight()[ss_selection])
        output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL])
        output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL])
        output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL])
        output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL])
        output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL])
        output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])
        output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[ss_selection], n2=n_nonprompt[ss_selection], n_ele=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[ss_selection].pt,
            phi  = ev.MET[ss_selection].phi,
            weight = weight.weight()[ss_selection]
        )

        output['lead_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )

        output['trail_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )
        
        
        return output

Example #24

Show file

File: Optimization.py Project: XL-Seb-Yan/toffea

    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        max_dR = awk.max(dR_ij, axis=1)
        max_dEta = awk.max(dEta_ij, axis=1)
        min_dR = awk.min(dR_ij, axis=1)
        min_dEta = awk.min(dEta_ij, axis=1)
        min_pT = awk.min(selected_jets.pt, axis=1)
        max_eta = abs(awk.max(selected_jets.eta, axis=1))

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)
        dPt_i_jk = abs(selected_jets[:, jet_i].pt -
                       (selected_jets[:, jet_j] + selected_jets[:, jet_k]).pt)

        max_dPhi_jjj = awk.max(dPhi_i_jk, axis=1)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        max_pt_overM = awk.max(pt_i_overM, axis=1)
        min_pt_overM = awk.min(pt_i_overM, axis=1)
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j

        for pt_cut in range(30, 1150, 5):
            cut_name = f"min_pT_cut{pt_cut}".format(pt_cut)
            selection = PackedSelection()
            selection.add("MinJetPt_cut", min_pT > pt_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_min_pT_cut{pt_cut}".format(
                pt_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for eta_cut in np.arange(0, 2.5, 0.05):
            cut_name = f"max_eta_cut{eta_cut}".format(eta_cut)
            selection = PackedSelection()
            selection.add("MaxJetEta_cut", max_eta < eta_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_max_eta_cut{eta_cut}".format(
                eta_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dEta_max_cut in np.arange(0, 5, 0.1):
            cut_name = f"dEta_max_cut{dEta_max_cut}".format(dEta_max_cut)
            selection = PackedSelection()
            selection.add("MaxJJdEta_cut", max_dEta < dEta_max_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dEta_jj_max_cut{dEta_max_cut}".format(
                dEta_max_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dR_min_cut in np.arange(0, 5, 0.1):
            cut_name = f"dR_min_cut{dR_min_cut}".format(dR_min_cut)
            selection = PackedSelection()
            selection.add("MinJJdR_cut", min_dR > dR_min_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dR_jj_min_cut{dR_min_cut}".format(
                dR_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        #min cut for the variable dPhi_jjj_max
        # for dPhi_jjj_max_min_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        # for dPhi_jjj_min_max_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        return output

Example #25

Show file

Lep5 = ((Elec5_PT_arr.count()) + (Muon5_PT_arr.count()) >= 2)
Lep6 = ((Elec6_PT_arr.count()) + (Muon6_PT_arr.count()) >= 2)
Lep7 = ((Elec7_PT_arr.count()) + (Muon7_PT_arr.count()) >= 2)
Lep8 = ((Elec8_PT_arr.count()) + (Muon8_PT_arr.count()) >= 2)

Jet1 = ((Jet1_PT_arr.count()) >= 2)
Jet2 = ((Jet2_PT_arr.count()) >= 2)
Jet3 = ((Jet3_PT_arr.count()) >= 2)
Jet4 = ((Jet4_PT_arr.count()) >= 2)
Jet5 = ((Jet5_PT_arr.count()) >= 2)
Jet6 = ((Jet6_PT_arr.count()) >= 2)
Jet7 = ((Jet7_PT_arr.count()) >= 2)
Jet8 = ((Jet8_PT_arr.count()) >= 2)

## scalar sum of lepton pt
lep1_pt_sum = ak.sum(Elec1_PT_arr, axis=-1) + ak.sum(Muon1_PT_arr,
                                                     axis=-1) > 120
lep2_pt_sum = ak.sum(Elec2_PT_arr, axis=-1) + ak.sum(Muon2_PT_arr,
                                                     axis=-1) > 120
lep3_pt_sum = ak.sum(Elec3_PT_arr, axis=-1) + ak.sum(Muon3_PT_arr,
                                                     axis=-1) > 120
lep4_pt_sum = ak.sum(Elec4_PT_arr, axis=-1) + ak.sum(Muon4_PT_arr,
                                                     axis=-1) > 120
lep5_pt_sum = ak.sum(Elec5_PT_arr, axis=-1) + ak.sum(Muon5_PT_arr,
                                                     axis=-1) > 120
lep6_pt_sum = ak.sum(Elec6_PT_arr, axis=-1) + ak.sum(Muon6_PT_arr,
                                                     axis=-1) > 120
lep7_pt_sum = ak.sum(Elec7_PT_arr, axis=-1) + ak.sum(Muon7_PT_arr,
                                                     axis=-1) > 120
lep8_pt_sum = ak.sum(Elec8_PT_arr, axis=-1) + ak.sum(Muon8_PT_arr,
                                                     axis=-1) > 120

Example #26

Show file

File: forward_jet.py Project: JacksonWallace/tW_scattering

    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)
        triggers = getTriggers(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(electron) == 1) & (ak.num(muon) == 1))
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(vetoelectron) + ak.num(vetomuon)) == 2)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('trigger', (triggers))
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('OS', OSlepton)
        selection.add('N_btag=2', (ak.num(btag) == 2))
        selection.add('N_jet>2', (ak.num(jet) >= 3))
        selection.add('N_central>1', (ak.num(central) >= 2))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))
        selection.add('MET>30', (ev.MET.pt > 30))

        os_reqs = [
            'lepveto', 'dilep', 'trigger', 'filter', 'p_T(lep0)>25',
            'p_T(lep1)>20', 'OS'
        ]
        bl_reqs = os_reqs + [
            'N_btag=2', 'N_jet>2', 'N_central>1', 'N_fwd>0', 'MET>30'
        ]

        os_reqs_d = {sel: True for sel in os_reqs}
        os_selection = selection.require(**os_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        cutflow = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in bl_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow(req, selection.require(**cutflow_reqs_d))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[os_selection].npvs,
                               weight=weight.weight()[os_selection])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_selection].npvsGood,
                                   weight=weight.weight()[os_selection])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[os_selection],
                           weight=weight.weight()[os_selection])
        output['N_central'].fill(dataset=dataset,
                                 multiplicity=ak.num(central)[os_selection],
                                 weight=weight.weight()[os_selection])
        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(electron)[os_selection],
                            weight=weight.weight()[os_selection])
        output['N_fwd'].fill(dataset=dataset,
                             multiplicity=ak.num(fwd)[os_selection],
                             weight=weight.weight()[os_selection])

        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[os_selection].pt,
                           phi=ev.MET[os_selection].phi,
                           weight=weight.weight()[os_selection])

        output['electron'].fill(dataset=dataset,
                                pt=ak.to_numpy(ak.flatten(electron[BL].pt)),
                                eta=ak.to_numpy(ak.flatten(electron[BL].eta)),
                                phi=ak.to_numpy(ak.flatten(electron[BL].phi)),
                                weight=weight.weight()[BL])

        output['muon'].fill(dataset=dataset,
                            pt=ak.to_numpy(ak.flatten(muon[BL].pt)),
                            eta=ak.to_numpy(ak.flatten(muon[BL].eta)),
                            phi=ak.to_numpy(ak.flatten(muon[BL].phi)),
                            weight=weight.weight()[BL])

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['trail_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['fwd_jet'].fill(dataset=dataset,
                               pt=ak.flatten(high_p_fwd[BL].pt_nom),
                               eta=ak.flatten(high_p_fwd[BL].eta),
                               phi=ak.flatten(high_p_fwd[BL].phi),
                               weight=weight.weight()[BL])

        output['b1'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 0:1][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 0:1][BL].phi),
                          weight=weight.weight()[BL])

        output['b2'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 1:2][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt_nom[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        output['j2'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(jet[:, 1:2][BL].eta),
                          phi=ak.flatten(jet[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j3'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 2:3][BL].pt_nom),
                          eta=ak.flatten(jet[:, 2:3][BL].eta),
                          phi=ak.flatten(jet[:, 2:3][BL].phi),
                          weight=weight.weight()[BL])

        # Now, take care of systematic unceratinties
        if not dataset == 'MuonEG':
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt > 25)]
                jet = jet[~match(
                    jet, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet = jet[~match(
                    jet, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                central = jet[(abs(jet.eta) < 2.4)]
                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd = getFwdJet(light)
                fwd_noPU = getFwdJet(light, puId=False)

                ## forward jets
                high_p_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.p, axis=1))]  # highest momentum spectator
                high_pt_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(
                    ak.argmax(abs(fwd.eta), axis=1))]  # most forward spectator

                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(
                    central.btagDeepFlavB)][:, :2]

                # get the modified selection -> more difficult
                selection.add('N_jet>2_' + var,
                              (ak.num(jet.pt) >= 3))  # stupid bug here...
                selection.add('N_btag=2_' + var, (ak.num(btag) == 2))
                selection.add('N_central>1_' + var, (ak.num(central) >= 2))
                selection.add('N_fwd>0_' + var, (ak.num(fwd) >= 1))
                selection.add('MET>30_' + var, (getattr(ev.MET, var) > 30))

                ## Don't change the selection for now...
                bl_reqs = os_reqs + [
                    'N_jet>2_' + var, 'MET>30_' + var, 'N_btag=2_' + var,
                    'N_central>1_' + var, 'N_fwd>0_' + var
                ]
                bl_reqs_d = {sel: True for sel in bl_reqs}
                BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(jet)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_fwd_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(fwd)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_central_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(central)[os_selection],
                    weight=weight.weight()[os_selection])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet.eta[:, 0:1][BL]),
                                         phi=ak.flatten(jet.phi[:, 0:1][BL]),
                                         weight=weight.weight()[BL])

                output['b1_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta=ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi=ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight=weight.weight()[BL])

                output['fwd_jet_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_p_fwd[BL].pt),
                    eta=ak.flatten(high_p_fwd[BL].eta),
                    phi=ak.flatten(high_p_fwd[BL].phi),
                    weight=weight.weight()[BL])

                output['MET_' + var].fill(dataset=dataset,
                                          pt=getattr(ev.MET,
                                                     var)[os_selection],
                                          phi=ev.MET[os_selection].phi,
                                          weight=weight.weight()[os_selection])

        return output

Example #27

Show file

File: SS_analysis.py Project: cjmcmahon1/tW_scattering

    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            ## Generated leptons
            gen_lep = ev.GenL
            leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
            trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton+trailing_lepton).mass
        dilepton_pt = (leading_lepton+trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)
        
        lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
            n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Tau and other stuff
        tau       = getTaus(ev)
        track     = getIsoTracks(ev)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

        bl          = cross(lepton, high_score_btag)
        bl_dR       = delta_r(bl['0'], bl['1'])
        min_bl_dR   = ak.min(bl_dR, axis=1)

        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        
        # define the weight
        weight = Weights( len(ev) )
        

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        

        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )
        
        BL = sel.dilep_baseline(cutflow=cutflow, SS=True)

        weight_BL = weight.weight()[BL]        

        if True:
            # define the inputs to the NN
            # this is super stupid. there must be a better way.
            NN_inputs = np.stack([
                ak.to_numpy(ak.num(jet[BL])),
                ak.to_numpy(ak.num(tau[BL])),
                ak.to_numpy(ak.num(track[BL])),
                ak.to_numpy(st[BL]),
                ak.to_numpy(ev.MET[BL].pt),
                ak.to_numpy(ak.max(mjf[BL], axis=1)),
                ak.to_numpy(pad_and_flatten(delta_eta[BL])),
                ak.to_numpy(pad_and_flatten(leading_lepton[BL].pt)),
                ak.to_numpy(pad_and_flatten(leading_lepton[BL].eta)),
                ak.to_numpy(pad_and_flatten(trailing_lepton[BL].pt)),
                ak.to_numpy(pad_and_flatten(trailing_lepton[BL].eta)),
                ak.to_numpy(pad_and_flatten(dilepton_mass[BL])),
                ak.to_numpy(pad_and_flatten(dilepton_pt[BL])),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].pt)),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].p)),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].eta)),
                ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].pt)),
                ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].pt)),
                ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].eta)),
                ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].eta)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].pt)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].pt)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].eta)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].eta)),
                ak.to_numpy(min_bl_dR[BL]),
                ak.to_numpy(min_mt_lep_met[BL]),
            ])

            NN_inputs = np.moveaxis(NN_inputs, 0, 1)

            model, scaler = load_onnx_model('v8')

            try:
                NN_inputs_scaled = scaler.transform(NN_inputs)

                NN_pred    = predict_onnx(model, NN_inputs_scaled)

                best_score = np.argmax(NN_pred, axis=1)


            except ValueError:
                #print ("Empty NN_inputs")
                NN_pred = np.array([])
                best_score = np.array([])
                NN_inputs_scaled = NN_inputs

            #k.clear_session()

            output['node'].fill(dataset=dataset, multiplicity=best_score, weight=weight_BL)

            output['node0_score_incl'].fill(dataset=dataset, score=NN_pred[:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL)
            output['node0_score'].fill(dataset=dataset, score=NN_pred[best_score==0][:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==0])
            output['node1_score'].fill(dataset=dataset, score=NN_pred[best_score==1][:,1] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==1])
            output['node2_score'].fill(dataset=dataset, score=NN_pred[best_score==2][:,2] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==2])
            output['node3_score'].fill(dataset=dataset, score=NN_pred[best_score==3][:,3] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==3])
            output['node4_score'].fill(dataset=dataset, score=NN_pred[best_score==4][:,4] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==4])

            SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0)))
            SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0)))
            leading_lepton_BL = leading_lepton[BL]

            output['lead_lep_SR_pp'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)),
                weight = weight_BL[SR_sel_pp]
            )

            output['lead_lep_SR_mm'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)),
                weight = weight_BL[SR_sel_mm]
            )

            del model
            del scaler
            del NN_inputs, NN_inputs_scaled, NN_pred

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL)
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL)
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight_BL)
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight_BL)
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight_BL)
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL)
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL)
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight_BL)
        output['ST'].fill(dataset=dataset, pt=st[BL], weight=weight_BL)
        output['HT'].fill(dataset=dataset, pt=ht[BL], weight=weight_BL)

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL)
            output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL)
            output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL)
            output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL)
            output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL)
            output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL)
            output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL)
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL].pt,
            phi  = ev.MET[BL].phi,
            weight = weight_BL
        )

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['lead_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
                weight = weight_BL
            )

            output['trail_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
                weight = weight_BL
            )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight_BL
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight_BL
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight_BL
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight_BL
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight_BL
        )
        
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(j_fwd[BL].pt),
            eta = ak.flatten(j_fwd[BL].eta),
            phi = ak.flatten(j_fwd[BL].phi),
            weight = weight_BL
        )
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight_BL)
        
        return output

Example #28

Show file

File: bbwwprocessor.py Project: dykim1/boostedhiggs

    def process(self, events):

        # get meta infos
        dataset = events.metadata["dataset"]
        isRealData = not hasattr(events, "genWeight")
        n_events = len(events)
        selection = processor.PackedSelection()
        weights = processor.Weights(n_events)
        output = self.accumulator.identity()

        # weights
        if not isRealData:
            output['sumw'][dataset] += awkward1.sum(events.genWeight)
        
        # trigger
        triggers = {}
        for channel in ["e","mu"]:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._trigger[channel]:
                try:
                    trigger = trigger | events.HLT[t]
                except:
                    warnings.warn("Missing trigger %s" % t, RuntimeWarning)
            triggers[channel] = trigger
            
        # met filter
        met_filters = ["goodVertices",
                       "globalSuperTightHalo2016Filter",
                       "HBHENoiseFilter",
                       "HBHENoiseIsoFilter",
                       "EcalDeadCellTriggerPrimitiveFilter",
                       "BadPFMuonFilter",
                       ]
        met_filters_mask = np.ones(len(events), dtype='bool')
        for t in met_filters:
            met_filters_mask = met_filters_mask & events.Flag[t]
        selection.add("met_filter", awkward1.to_numpy(met_filters_mask))
        
        # load objects
        muons = events.Muon
        electrons = events.Electron
        jets = events.Jet
        fatjets = events.FatJet
        subjets = events.SubJet
        fatjetsLS = events.FatJetLS
        met = events.MET
        
        # muons
        goodmuon = (
            (muons.mediumId)
            & (muons.miniPFRelIso_all <= 0.2)
            & (muons.pt >= 27)
            & (abs(muons.eta) <= 2.4)
            & (abs(muons.dz) < 0.1)
            & (abs(muons.dxy) < 0.05)
            & (muons.sip3d < 4)
        )
        good_muons = muons[goodmuon]
        ngood_muons = awkward1.sum(goodmuon, axis=1)

        # electrons
        goodelectron = (
            (electrons.mvaFall17V2noIso_WP90)
            & (electrons.pt >= 30)
            & (abs(electrons.eta) <= 1.479)
            & (abs(electrons.dz) < 0.1)
            & (abs(electrons.dxy) < 0.05)
            & (electrons.sip3d < 4)
        )
        good_electrons = electrons[goodelectron]
        ngood_electrons = awkward1.sum(goodelectron, axis=1)
        
        # good leptons
        good_leptons = awkward1.concatenate([good_muons, good_electrons], axis=1)
        good_leptons = good_leptons[awkward1.argsort(good_leptons.pt)]
        
        # lepton candidate
        candidatelep = awkward1.firsts(good_leptons)
        
        # lepton channel selection
        selection.add("ch_e", awkward1.to_numpy((triggers["e"]) & (ngood_electrons==1) & (ngood_muons==0))) # not sure if need to require 0 muons or 0 electrons in the next line
        selection.add("ch_mu", awkward1.to_numpy((triggers["mu"]) & (ngood_electrons==0) & (ngood_muons==1)))
        
        # jets
        ht = awkward1.sum(jets[jets.pt > 30].pt,axis=1)
        selection.add("ht_400", awkward1.to_numpy(ht>=400))
        goodjet = (
            (jets.isTight)
            & (jets.pt > 30)
            & (abs(jets.eta) <= 2.5)
            )
        good_jets = jets[goodjet]

        # fat jets
        jID = "isTight"
        # TODO: add mass correction

        # a way to get the first two subjets
        # cart = awkward1.cartesian([fatjets, subjets], nested=True)
        # idxes = awkward1.pad_none(awkward1.argsort(cart['0'].delta_r(cart['1'])), 2, axis=2)
        # sj1 = subjets[idxes[:,:,0]]
        # sj2 = subjets[idxes[:,:,1]]
        
        good_fatjet = (
            (getattr(fatjets, jID))
            & (abs(fatjets.eta) <= 2.4)
            & (fatjets.pt > 50)
            & (fatjets.msoftdrop > 30)
            & (fatjets.msoftdrop < 210)
            #& (fatjets.pt.copy(content=fatjets.subjets.content.counts) == 2) # TODO: require 2 subjets?
            # this can probably be done w FatJet_subJetIdx1 or FatJet_subJetIdx2
            & (awkward1.all(fatjets.subjets.pt >= 20))
            & (awkward1.all(abs(fatjets.subjets.eta) <= 2.4))
        )
        good_fatjets = fatjets[good_fatjet]

        # hbb candidate
        mask_hbb = (
            (good_fatjets.pt > 200)
            & (good_fatjets.delta_r(candidatelep) > 2.0)
            )
        candidateHbb = awkward1.firsts(good_fatjets[mask_hbb])

        # b-tag #& (good_fatjets.particleNetMD_Xbb > 0.9)
        selection.add('hbb_btag',awkward1.to_numpy(candidateHbb.deepTagMD_ZHbbvsQCD >= 0.8)) # score would be larger for tight category (0.97)  
        
        # No AK4 b-tagged jets away from bb jet
        jets_HbbV = jets[good_jets.delta_r(candidateHbb) >= 1.2]
        selection.add('hbb_vetobtagaway',  awkward1.to_numpy(awkward1.max(jets_HbbV.btagDeepB, axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._year]['medium']))
        
        # fat jets Lepton Subtracted
        # wjj candidate
        mask_wjj = (
            (fatjetsLS.pt > 50)
            & (fatjetsLS.delta_r(candidatelep) > 1.2)
            # need to add 2 subjets w pt > 20 & eta<2.4
            # need to add ID?
            )
        candidateWjj = awkward1.firsts(fatjetsLS[mask_wjj][awkward1.argmin(fatjetsLS[mask_wjj].delta_r(candidatelep),axis=1,keepdims=True)])
        # add t2/t1 <= 0.75 (0.45 HP)
        selection.add('hww_mass',  awkward1.to_numpy(candidateWjj.mass >= 10))

        print('met ',met)
        # wjjlnu info
        #HSolverLiInfo  hwwInfoLi;
        # qqSDmass = candidateWjj.msoftdrop
        # hwwLi   = hSolverLi->minimize(candidatelep.p4(), met.p4(), wjjcand.p4(), qqSDmass, hwwInfoLi)
        #neutrino = hwwInfoLi.neutrino;
        #wlnu     = hwwInfoLi.wlnu;
        #wqq      = hwwInfoLi.wqqjet;
        #hWW      = hwwInfoLi.hWW;
        #wwDM     = PhysicsUtilities::deltaR( wlnu,wqq) * hWW.pt()/2.0;
        # add dlvqq <= 11 (2.5 HP)
               
        # in the meantime let's add the mass
        '''
        mm = (candidatejet - candidatelep).mass2
        jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*candidatejet.mass
        joffshell = jmass < 62.5
        massassumption = 80.*joffshell + (125 - 80.)*~joffshell
        x = massassumption**2/(2*candidatelep.pt*met.pt) + np.cos(candidatelep.phi - met.phi)
        met_eta = (
            (x < 1)*np.arcsinh(x*np.sinh(candidatelep.eta))
            + (x > 1)*(
                candidatelep.eta - np.sign(candidatelep.eta)*np.arccosh(candidatelep.eta)
                )
            )
        met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),np.array([0.]),np.array([0.]),np.array([0.]))
        if met.size > 0:
            met_p4 = TLorentzVectorArray.from_ptetaphim(met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size))
        
        # hh system
        candidateHH = candidateWjj + met_p4 + candidateHbb
        selection.add('hh_mass', candidateHH.mass >= 700)
        selection.add('hh_centrality', candidateHH.pt/candidateHH.mass >= 0.3)
        '''
        
        channels = {"e": ["met_filter","ch_e","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"], #,"hh_mass","hh_centrality"],
                    "mu": ["met_filter","ch_mu","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"] #,"hh_mass","hh_centrality"],
                    }

        # need to add gen info
        
        if not isRealData:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            
        for channel, cuts in channels.items():
            allcuts = set()
            output['cutflow'].fill(dataset=dataset, channel=channel, cut=0, weight=weights.weight())
            for i, cut in enumerate(cuts):
                allcuts.add(cut)
                cut = selection.all(*allcuts)
                output['cutflow'].fill(dataset=dataset, channel=channel, cut=i + 1, weight=weights.weight()[cut])

        return output

Example #29

Show file

File: ML.py Project: XL-Seb-Yan/toffea

    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()
        
        # Initialize dict accumulators, if have not been initialized
        for jet in [0, 1, 2]:
            if dataset_name not in output[f"eta_{jet}_final"].keys():
                output[f"eta_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"ptoverM_{jet}_final"].keys():
                output[f"ptoverM_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            if dataset_name not in output[f"dEta_{pair[0]}{pair[1]}_final"].keys():
                output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dR_{pair[0]}{pair[1]}_final"].keys():
                output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"moverM_{pair[0]}{pair[1]}_final"].keys():
                output[f"moverM_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            if dataset_name not in output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_max_final"].keys():
            output[f"ptoverM_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_min_final"].keys():
            output[f"ptoverM_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"eta_max_final"].keys():
            output[f"eta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_max_final"].keys():
            output[f"dR_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_min_final"].keys():
            output[f"dR_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_max_final"].keys():
            output[f"dEta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_min_final"].keys():
            output[f"dEta_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_max_final"].keys():
            output[f"dR_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_min_final"].keys():
            output[f"dR_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_max_final"].keys():
            output[f"dEta_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_min_final"].keys():
            output[f"dEta_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_max_final"].keys():
            output[f"dPhi_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_min_final"].keys():
            output[f"dPhi_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_max_final"].keys():
            output[f"dPtoverM_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_min_final"].keys():
            output[f"dPtoverM_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name: #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        
        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]
        
        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0]
        
        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta)
        
        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] + selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass
        
        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:,0] / m3j
        m_12_overM = m_ij[:,1] / m3j
        m_20_overM = m_ij[:,2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt - (selected_jets[:, 1] + selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt - (selected_jets[:, 2] + selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt - (selected_jets[:, 0] + selected_jets[:, 1]).pt) / m3j
        
        # Event selection masks
        # selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(**{name: True for name in selection.names})
        # selection_masks["Pre-selection"] = sel_mask
        
        output["selected_events"][dataset_name] += events_3j[sel_mask].__len__()
        
        for jet in [0, 1, 2]:
            output[f"eta_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(selected_jets[:, jet][sel_mask].eta))
            output[f"ptoverM_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(pt_i_overM[:, jet][sel_mask]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_ij[:, pair[0]][sel_mask]))
            output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_ij[:, pair[0]][sel_mask]))
        
        output[f"moverM_01_final"][dataset_name] += processor.column_accumulator(np.array(m_01_overM[sel_mask]))
        output[f"moverM_12_final"][dataset_name] += processor.column_accumulator(np.array(m_12_overM[sel_mask]))
        output[f"moverM_20_final"][dataset_name] += processor.column_accumulator(np.array(m_20_overM[sel_mask]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_i_jk[:, pair[0]][sel_mask]))
            output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_i_jk[:, pair[0]][sel_mask]))
            output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dPhi_i_jk[:, pair[0]][sel_mask]))
        
        output[f"dPtoverM_0_12_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_0_12[sel_mask]))
        output[f"dPtoverM_1_20_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_1_20[sel_mask]))
        output[f"dPtoverM_2_01_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_2_01[sel_mask]))
        
        max_pt_overM_2fill = awk.max(pt_i_overM[sel_mask], axis=1)
        min_pt_overM_2fill = awk.min(pt_i_overM[sel_mask], axis=1)
        max_dR_2fill   = awk.max(dR_ij[sel_mask], axis=1)
        max_dEta_2fill = awk.max(dEta_ij[sel_mask], axis=1)
        min_dR_2fill   = awk.min(dR_ij[sel_mask], axis=1)
        min_dEta_2fill = awk.min(dEta_ij[sel_mask], axis=1)
        min_pt_2fill   = awk.min(selected_jets[sel_mask].pt, axis=1)
        max_eta_2fill  = awk.max(abs(selected_jets[sel_mask].eta), axis=1)
        max_dR_i_jk_2fill = awk.max(dR_i_jk[sel_mask], axis=1)
        min_dR_i_jk_2fill = awk.min(dR_i_jk[sel_mask], axis=1)
        max_dEta_i_jk_2fill = awk.max(dEta_i_jk[sel_mask], axis=1)
        min_dEta_i_jk_2fill = awk.min(dEta_i_jk[sel_mask], axis=1)
        max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk[sel_mask], axis=1)
        min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk[sel_mask], axis=1)
        max_dPtoverM_i_jk_2fill = []
        min_dPtoverM_i_jk_2fill = []
        dPtoverM_0_12_2fill = dPtoverM_0_12[sel_mask]
        dPtoverM_1_20_2fill = dPtoverM_1_20[sel_mask]
        dPtoverM_2_01_2fill = dPtoverM_2_01[sel_mask]
        for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill, dPtoverM_2_01_2fill):
            max_dPtoverM_i_jk_2fill.append(max(pair))
            min_dPtoverM_i_jk_2fill.append(min(pair))
        max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
        min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
        max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
        max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
        min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
        min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
        min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
        max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
        max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
        min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
        max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
        min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
        max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
        min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)
        
        output[f"ptoverM_max_final"][dataset_name] += processor.column_accumulator(np.array(max_pt_overM_2fill))
        output[f"ptoverM_min_final"][dataset_name] += processor.column_accumulator(np.array(min_pt_overM_2fill))
        output[f"eta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_eta_2fill))
        output[f"dR_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_2fill))
        output[f"dR_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_2fill))
        output[f"dEta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_2fill))
        output[f"dEta_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_2fill))
        output[f"dR_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_i_jk_2fill))
        output[f"dR_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_i_jk_2fill))
        output[f"dEta_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_i_jk_2fill))
        output[f"dEta_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_i_jk_2fill))
        output[f"dPhi_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPhi_i_jk_2fill))
        output[f"dPhi_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPhi_i_jk_2fill))
        output[f"dPtoverM_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPtoverM_i_jk_2fill))
        output[f"dPtoverM_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPtoverM_i_jk_2fill))

        return output

Example #30

Show file

File: trilep_analysis_nonprompt.py Project: cjmcmahon1/tW_scattering

    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Generated leptons
        gen_lep = ev.GenL
        leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
        trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)

        dimuon = choose(muon,2)
        OS_dimuon = dimuon[(dimuon['0'].charge*dimuon['1'].charge < 0)]

        dielectron = choose(electron)
        OS_dielectron = dielectron[(dielectron['0'].charge*dielectron['1'].charge < 0)]

        OS_dimuon_bestZmumu = OS_dimuon[ak.singletons(ak.argmin(abs(OS_dimuon.mass-91.2), axis=1))]
        OS_dielectron_bestZee = OS_dielectron[ak.singletons(ak.argmin(abs(OS_dielectron.mass-91.2), axis=1))]
        OS_dilepton_mass = ak.fill_none(ak.pad_none(ak.concatenate([OS_dimuon_bestZmumu.mass, OS_dielectron_bestZee.mass], axis=1), 1, clip=True), -1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )
        
        if not dataset=='MuonEG':
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            ## lepton SFs
            #weight.add("lepton", self.leptonSF.get(electron, muon))
        
        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )

        BL = sel.trilep_baseline(cutflow=cutflow)
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight.weight()[BL])
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight.weight()[BL])
        output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL])
        output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL])
        output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL])
        output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL])
        output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL])
        output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])
        
        # make a plot of the dilepton mass, but without applying the cut on the dilepton mass itself (N-1 plot)
        output['dilep_mass'].fill(dataset=dataset, mass=ak.flatten(OS_dilepton_mass[sel.trilep_baseline(omit=['offZ'])]), weight=weight.weight()[sel.trilep_baseline(omit=['offZ'])])

        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL].pt,
            phi  = ev.MET[BL].phi,
            weight = weight.weight()[BL]
        )

        output['lead_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )

        output['trail_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        #output['j3'].fill(
        #    dataset = dataset,
        #    pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
        #    eta = ak.flatten(jet[:, 2:3][BL].eta),
        #    phi = ak.flatten(jet[:, 2:3][BL].phi),
        #    weight = weight.weight()[BL]
        #)
        
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(j_fwd[BL].pt),
            eta = ak.flatten(j_fwd[BL].eta),
            phi = ak.flatten(j_fwd[BL].phi),
            weight = weight.weight()[BL]
        )
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight.weight()[BL])
        
        return output