Example #1
0
 def sum(self, axis=-1):
     return awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
         },
         with_name="TwoVector",
     )
Example #2
0
 def sum(self, axis=-1):
     return awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
             "z": awkward1.sum(self.z, axis=axis),
             "t": awkward1.sum(self.t, axis=axis),
         },
         with_name="LorentzVector",
     )
Example #3
0
 def sum(self, axis=-1):
     """Sum an array of vectors elementwise using `x` and `y` components"""
     out = awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
         },
         with_name="TwoVector",
         highlevel=False,
     )
     return awkward1._util.wrap(out, cache=self.cache, behavior=self.behavior)
Example #4
0
 def sum(self, axis=-1):
     return awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
             "z": awkward1.sum(self.z, axis=axis),
             "t": awkward1.sum(self.t, axis=axis),
             "charge": awkward1.sum(self.charge, axis=axis),
         },
         with_name="Candidate",
     )
Example #5
0
 def sum(self, axis=-1):
     """Sum an array of vectors elementwise using `x`, `y`, `z`, `t`, and `charge` components"""
     return awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
             "z": awkward1.sum(self.z, axis=axis),
             "t": awkward1.sum(self.t, axis=axis),
             "charge": awkward1.sum(self.charge, axis=axis),
         },
         with_name="Candidate",
     )
Example #6
0
 def sum(self, axis=-1):
     """Sum an array of vectors elementwise using `x`, `y`, and `z` components"""
     out = awkward1.zip(
         {
             "x": awkward1.sum(self.x, axis=axis),
             "y": awkward1.sum(self.y, axis=axis),
             "z": awkward1.sum(self.z, axis=axis),
         },
         with_name="ThreeVector",
         highlevel=False,
     )
     return awkward1.Array(out, behavior=self.behavior)
Example #7
0
    def flip_weight(self, electron):

        f_1 = self.evaluator['el'](electron.pt[:,0:1], abs(electron.eta[:,0:1]))
        f_2 = self.evaluator['el'](electron.pt[:,1:2], abs(electron.eta[:,1:2]))

        # For custom measurements
        #f_1 = yahist_2D_lookup(self.ratio, electron.pt[:,0:1], abs(electron.eta[:,0:1]))
        #f_2 = yahist_2D_lookup(self.ratio, electron.pt[:,1:2], abs(electron.eta[:,1:2]))

        # I'm using ak.prod and ak.sum to replace empty arrays by 1 and 0, respectively
        weight = ak.sum(f_1/(1-f_1), axis=1)*ak.prod(1-f_2/(1-f_2), axis=1) + ak.sum(f_2/(1-f_2), axis=1)*ak.prod(1-f_1/(1-f_1), axis=1)

        return weight
Example #8
0
def lct_check(csc_accept, csc_chamber, lct_chamber, alct_chamber,
              clct_chamber):

    csc_pass = np.array(ak.num(csc_accept, axis=1) > 0)
    if np.count_nonzero(csc_pass) == 0: return [0, 0, 0, 0]

    csc_cham_pass = csc_chamber[csc_pass]
    lct_cham_pass = lct_chamber[csc_pass]
    alct_cham_pass = alct_chamber[csc_pass]
    clct_cham_pass = clct_chamber[csc_pass]

    #print(csc_cham_pass)
    #print(lct_cham_pass)
    #print(alct_cham_pass)
    #print(clct_cham_pass)

    lct_corr = 0
    alct_corr = 0
    clct_corr = 0
    for i in range(len(csc_cham_pass)):
        for j in range(len(csc_cham_pass[i])):
            if csc_cham_pass[i][j] in lct_cham_pass[i]: lct_corr += 1
            if csc_cham_pass[i][j] in alct_cham_pass[i]: alct_corr += 1
            if csc_cham_pass[i][j] in clct_cham_pass[i]: clct_corr += 1

    total_cham = ak.sum(ak.num(csc_cham_pass))
    lct_eff = lct_corr / total_cham * 100
    alct_eff = alct_corr / total_cham * 100
    clct_eff = clct_corr / total_cham * 100

    return [total_cham, lct_eff, alct_eff, clct_eff]
Example #9
0
    def process(self, events):
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']
        muons = ak.zip(
            {
                "pt": events.Muon_pt,
                "eta": events.Muon_eta,
                "phi": events.Muon_phi,
                "mass": events.Muon_mass,
                "charge": events.Muon_charge,
            },
            with_name="PtEtaPhiMCandidate")

        cut = (ak.num(muons) == 2) & (ak.sum(muons.charge) == 0)
        # add first and second muon in every event together
        dimuon = muons[cut][:, 0] + muons[cut][:, 1]

        output["sumw"][dataset] += len(events)
        output["mass"].fill(
            dataset=dataset,
            mass=dimuon.mass,
        )

        return output
Example #10
0
def test_awkward_accessor():
    x = fletcher.FletcherContinuousArray([[1.0, 2.0], [], [3.0, 4.0, 5.0]])
    y = np.zeros(len(x), dtype=float)
    df = pd.DataFrame(dict(x=x, y=y))
    df.to_root(".test.root", compression_jagged=None)
    df = pd.read_root(".test.root")
    assert df["x"].ak(0).sum().tolist() == [3.0, 0.0, 12.0]
    assert awkward1.sum(df["x"], axis=-1).tolist() == [3.0, 0.0, 12.0]
Example #11
0
def debug_eff(llp_accept, csc_accept, emtf_accept, gmt_accept, csc_endcap,
              csc_station, csc_ring, csc_chamber, emtf_endcap, emtf_station):

    # Create a mask for events in acceptance
    llp_pass = np.array(ak.sum(llp_accept, axis=-1) > 0)

    # Apppply mask to the csc, emtf, and gmt
    csc_pass = csc_accept[llp_pass]
    emtf_pass = emtf_accept[llp_pass]
    gmt_pass = gmt_accept[llp_pass]

    mismatch = ak.num(emtf_pass) > ak.flatten(gmt_pass) * 5
    print("Mismatch:")
    print(mismatch)
    print('')

    csc_pass_endcap = csc_endcap[llp_pass]
    csc_pass_station = csc_station[llp_pass]
    csc_pass_ring = csc_ring[llp_pass]
    csc_pass_chamber = csc_chamber[llp_pass]

    print("CSC Masked Array:")
    print(csc_pass[mismatch])
    print("CSC endcap:")
    print(csc_pass_endcap[mismatch])
    print("CSC station:")
    print(csc_pass_station[mismatch])
    print("CSC ring:")
    print(csc_pass_ring[mismatch])
    print("CSC chamber:")
    print(csc_pass_chamber[mismatch])
    print("")

    emtf_pass_endcap = emtf_endcap[llp_pass]
    emtf_pass_station = emtf_station[llp_pass]

    print("EMTF Masked Array:")
    print(emtf_pass[mismatch])
    print("EMTF endcap:")
    print(emtf_pass_endcap[mismatch])
    print("EMTF sector:")
    print(emtf_pass_station[mismatch])
    print("")

    print("GMT Masked Array:")
    print(gmt_pass[mismatch])
    print("")

    # Calculate efficiency for each
    n_acc = np.count_nonzero(llp_pass)
    csc_eff = np.count_nonzero(ak.num(csc_pass, axis=1)) / n_acc * 100
    emtf_eff = np.count_nonzero(ak.num(emtf_pass, axis=1)) / n_acc * 100
    gmt_eff = np.count_nonzero(gmt_pass) / n_acc * 100

    return [n_acc, csc_eff, emtf_eff, gmt_eff]
Example #12
0
def get_eff(llp_accept, csc_accept, csc_accept_loose, csc_accept_tight,
            emtf_accept, emtf_accept_loose, gmt_accept):

    # Create a mask for events in acceptance
    llp_pass = np.array(ak.sum(llp_accept, axis=-1) > 0)
    n_acc = np.count_nonzero(llp_pass)
    if n_acc == 0: return [0, 0, 0, 0]

    # Apppply mask to the csc, emtf, and gmt
    csc_pass = csc_accept[llp_pass]
    emtf_pass = emtf_accept[llp_pass]
    gmt_pass = gmt_accept[llp_pass]
    csc_pass_loose = csc_accept_loose[llp_pass]
    csc_pass_tight = csc_accept_tight[llp_pass]
    emtf_pass_loose = emtf_accept_loose[llp_pass]

    # Calculate efficiency for each
    csc_eff = np.count_nonzero(ak.sum(csc_pass, axis=1)) / n_acc * 100
    emtf_eff = np.count_nonzero(ak.sum(emtf_pass, axis=1)) / n_acc * 100
    gmt_eff = np.count_nonzero((gmt_pass)) / n_acc * 100
    csc_eff_loose = np.count_nonzero(ak.sum(csc_pass_loose,
                                            axis=1)) / n_acc * 100
    csc_eff_tight = np.count_nonzero(ak.sum(csc_pass_tight,
                                            axis=1)) / n_acc * 100
    emtf_eff_loose = np.count_nonzero(ak.sum(emtf_pass_loose,
                                             axis=1)) / n_acc * 100
    csc_eff_2loose = np.count_nonzero(
        np.array(ak.sum(csc_pass_loose, axis=1) > 1)) / n_acc * 100

    emtf_one = np.array(ak.sum(emtf_pass, axis=1) > 0)
    emtf_two = np.array(ak.sum(emtf_pass_loose, axis=1) > 0)
    emtf_or = emtf_one | emtf_two
    emtf_eff_or = np.count_nonzero(emtf_or) / n_acc * 100

    return [
        n_acc, csc_eff_loose, csc_eff, csc_eff_tight, csc_eff_2loose, emtf_eff,
        emtf_eff_loose, emtf_eff_or, gmt_eff
    ]
Example #13
0
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        second_lepton = lepton[~(trailing_lepton_idx & leading_lepton_idx)]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        lt = met_pt + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        # define the weight
        weight = Weights(len(ev))

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add(
                "btag",
                self.btagSF.Method1a(btag,
                                     light,
                                     b_direction='central',
                                     c_direction='central'))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        sel = Selection(
            dataset=dataset,
            events=ev,
            year=self.year,
            ele=electron,
            ele_veto=vetoelectron,
            mu=muon,
            mu_veto=vetomuon,
            jet_all=jet,
            jet_central=central,
            jet_btag=btag,
            jet_fwd=fwd,
            met=ev.MET,
        )

        BL = sel.dilep_baseline(SS=False)

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[BL_minusNb],
                           weight=weight.weight()[BL_minusNb])

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run' % dataset] += processor.column_accumulator(
                run_[BL])
            output['%s_lumi' % dataset] += processor.column_accumulator(
                lumi_[BL])
            output['%s_event' % dataset] += processor.column_accumulator(
                event_[BL])

        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations

                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                weight = Weights(len(ev))
                weight.add("weight", ev.weight * cfg['lumi'][self.year])
                weight.add("PU",
                           ev.puWeight,
                           weightUp=ev.puWeightUp,
                           weightDown=ev.puWeightDown,
                           shift=False)
                if var == 'centralUp':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='central',
                                             c_direction='up'))
                elif var == 'centralDown':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='central',
                                             c_direction='down'))
                elif var == 'upCentral':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='up',
                                             c_direction='central'))
                elif var == 'downCentral':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='down',
                                             c_direction='central'))

                weight.add("lepton", self.leptonSF.get(electron, muon))
                met = ev.MET
                sel = Selection(
                    dataset=dataset,
                    events=ev,
                    year=self.year,
                    ele=electron,
                    ele_veto=vetoelectron,
                    mu=muon,
                    mu_veto=vetomuon,
                    jet_all=jet,
                    jet_central=central,
                    jet_btag=btag,
                    jet_fwd=fwd,
                    met=met,
                )

                BL = sel.dilep_baseline(SS=False)

                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[BL_minusNb],
                    weight=weight.weight()[BL_minusNb])

        return output
Example #14
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        OSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)<0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        OSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)<0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        lepton   = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)<0, axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        high_p_fwd   = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        high_pt_fwd  = fwd[ak.singletons(ak.argmax(fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator
        
        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]
        
        jf          = cross(high_p_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        deltaEta    = abs(high_p_fwd.eta - jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max     = ak.max(mjf, axis=1)
        
        jj          = choose(jet, 2)
        mjj_max     = ak.max((jj['0']+jj['1']).mass, axis=1)
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        ht_central = ak.sum(central.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        
        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )

        BL = sel.dilep_baseline(cutflow=cutflow, SS=False)
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb])

        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])

        BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd])
        
        BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL_minusMET].pt,
            phi  = ev.MET[BL_minusMET].phi,
            weight = weight.weight()[BL_minusMET]
        )
        
        #output['electron'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(electron[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(electron[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(electron[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        #
        #output['muon'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(muon[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(muon[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(muon[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_p_fwd[BL].pt_nom),
            eta = ak.flatten(high_p_fwd[BL].eta),
            phi = ak.flatten(high_p_fwd[BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['b1'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
            eta = ak.flatten(high_score_btag[:, 0:1][BL].eta),
            phi = ak.flatten(high_score_btag[:, 0:1][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['b2'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
            eta = ak.flatten(high_score_btag[:, 1:2][BL].eta),
            phi = ak.flatten(high_score_btag[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run'%dataset] += processor.column_accumulator(run_[BL])
            output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL])
            output['%s_event'%dataset] += processor.column_accumulator(event_[BL])
        
        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId>1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt>25)]
                jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
                jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons

                central   = jet[(abs(jet.eta)<2.4)]
                btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
                light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd       = getFwdJet(light)
                fwd_noPU  = getFwdJet(light, puId=False)
        
                ## forward jets
                high_p_fwd   = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
                high_pt_fwd  = fwd[ak.singletons(ak.argmax(fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator
        
                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

                met = ev.MET
                met['pt'] = getattr(met, var)

                sel = Selection(
                    dataset = dataset,
                    events = ev,
                    year = self.year,
                    ele = electron,
                    ele_veto = vetoelectron,
                    mu = muon,
                    mu_veto = vetomuon,
                    jet_all = jet,
                    jet_central = central,
                    jet_btag = btag,
                    jet_fwd = fwd,
                    met = met,
                )

                BL = sel.dilep_baseline(SS=False)

                # get the modified selection -> more difficult
                #selection.add('N_jet>2_'+var, (ak.num(jet.pt)>=3)) # stupid bug here...
                #selection.add('N_btag=2_'+var,      (ak.num(btag)==2) ) 
                #selection.add('N_central>1_'+var,   (ak.num(central)>=2) )
                #selection.add('N_fwd>0_'+var,       (ak.num(fwd)>=1) )
                #selection.add('MET>30_'+var, (getattr(ev.MET, var)>30) )

                ### Don't change the selection for now...
                #bl_reqs = os_reqs + ['N_jet>2_'+var, 'MET>30_'+var, 'N_btag=2_'+var, 'N_central>1_'+var, 'N_fwd>0_'+var]
                #bl_reqs_d = { sel: True for sel in bl_reqs }
                #BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_'+var].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])
                BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
                output['N_fwd_'+var].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd])
                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_'+var].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb])
                output['N_central_'+var].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])


                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(jet.pt[:, 0:1][BL]),
                    eta = ak.flatten(jet.eta[:, 0:1][BL]),
                    phi = ak.flatten(jet.phi[:, 0:1][BL]),
                    weight = weight.weight()[BL]
                )
                
                output['b1_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta = ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi = ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight = weight.weight()[BL]
                )
                
                output['fwd_jet_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(high_p_fwd[BL].pt),
                    #p   = ak.flatten(high_p_fwd[BL].p),
                    eta = ak.flatten(high_p_fwd[BL].eta),
                    phi = ak.flatten(high_p_fwd[BL].phi),
                    weight = weight.weight()[BL]
                )

                BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])        
                output['MET_'+var].fill(
                    dataset = dataset,
                    pt  = getattr(ev.MET, var)[BL_minusMET],
                    phi  = ev.MET[BL_minusMET].phi,
                    weight = weight.weight()[BL_minusMET]
                )
        
        return output
Example #15
0
    def dilep_baseline(self, omit=[], cutflow=None, tight=False, SS=True):
        '''
        give it a cutflow object if you want it to be filed.
        cuts in the omit list will not be applied
        '''
        self.selection = PackedSelection()

        is_dilep   = ((ak.num(self.ele) + ak.num(self.mu))==2)
        pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0)
        lep0pt     = ((ak.num(self.ele[(self.ele.pt>25)]) + ak.num(self.mu[(self.mu.pt>25)]))>0)
        lep1pt     = ((ak.num(self.ele[(self.ele.pt>20)]) + ak.num(self.mu[(self.mu.pt>20)]))>1)
        lepveto    = ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==2)

        dimu    = choose(self.mu, 2)
        diele   = choose(self.ele, 2)
        dilep   = cross(self.mu, self.ele)

        if SS:
            is_SS = ( ak.any((dimu['0'].charge * dimu['1'].charge)>0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)>0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)>0, axis=1) )
        else:
            is_OS = ( ak.any((dimu['0'].charge * dimu['1'].charge)<0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)<0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)<0, axis=1) )

        lepton = ak.concatenate([self.ele, self.mu], axis=1)
        lepton_pdgId_pt_ordered = ak.fill_none(
            ak.pad_none(
                lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True),
        0)

        triggers  = getTriggers(self.events,
            ak.flatten(lepton_pdgId_pt_ordered[:,0:1]),
            ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset)

        ht = ak.sum(self.jet_all.pt, axis=1)
        st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1)

        self.selection.add('lepveto',       lepveto)
        self.selection.add('dilep',         is_dilep)
        #self.selection.add('filter',        self.filters)
        self.selection.add('trigger',       triggers)
        self.selection.add('p_T(lep0)>25',  lep0pt)
        self.selection.add('p_T(lep1)>20',  lep1pt)
        if SS:
            self.selection.add('SS',            is_SS )
        else:
            self.selection.add('OS',            is_OS )
        self.selection.add('N_jet>3',       (ak.num(self.jet_all)>3) )
        self.selection.add('N_jet>4',       (ak.num(self.jet_all)>4) )
        self.selection.add('N_central>2',   (ak.num(self.jet_central)>2) )
        self.selection.add('N_central>3',   (ak.num(self.jet_central)>3) )
        self.selection.add('N_btag>0',      (ak.num(self.jet_btag)>0) )
        self.selection.add('N_fwd>0',       (ak.num(self.jet_fwd)>0) )
        self.selection.add('MET>30',        (self.met.pt>30) )
        self.selection.add('MET>50',        (self.met.pt>50) )
        self.selection.add('ST>600',        (st>600) )

        ss_reqs = [
        #    'filter',
            'lepveto',
            'dilep',
            'p_T(lep0)>25',
            'p_T(lep1)>20',
            'trigger',
            'SS' if SS else 'OS',
            'N_jet>3',
            'N_central>2',
            'N_btag>0',
            'MET>30',
            'N_fwd>0',
        ]
        
        if tight:
            ss_reqs += [
                'N_jet>4',
                'N_central>3',
                'ST>600',
                'MET>50',
                #'delta_eta',
            ]

        ss_reqs_d = { sel: True for sel in ss_reqs if not sel in omit }
        ss_selection = self.selection.require(**ss_reqs_d)

        if cutflow:
            #
            cutflow_reqs_d = {}
            for req in ss_reqs:
                cutflow_reqs_d.update({req: True})
                cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) )

        return ss_selection
Example #16
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.pt > 20) & (abs(electron.eta) < 2.4)]

        electron = electron[((electron.genPartIdx >= 0) &
                             (np.abs(electron.matched_gen.pdgId) == 11)
                             )]  #from here on all leptons are gen-matched

        ##Muons
        muon = Collections(ev, "Muon", "tight").get()
        muon = muon[(muon.pt > 20) & (abs(muon.eta) < 2.4)]

        muon = muon[((muon.genPartIdx >= 0) &
                     (np.abs(muon.matched_gen.pdgId) == 13))]

        ##Leptons

        lepton = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton) == 2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton) == 2)

        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet = jet[ak.argsort(
            jet.pt, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(jet, electron, deltaRCut=0.4)]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)

        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_weight(electron))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('jet', jet_all)

        bl_reqs = ['filter', 'jet']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        #outputs
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[baseline],
                             weight=weight.weight()[baseline])

        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(lepton)[ss_sel],
                             weight=weight.weight()[ss_sel])

        output['N_ele2'].fill(dataset=dataset,
                              multiplicity=ak.num(lepton)[os_sel],
                              weight=weight2.weight()[os_sel])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[ss_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[os_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[os_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        return output
Example #17
0
genpart_pt = full_hh4b_samples['GenPart_pt']
genpart_status = full_hh4b_samples['GenPart_status']

full_hh4b_samples['GenPart_status']
full_hh4b_samples['GenPart_statusFlags']

higgses = (full_hh4b_samples['GenPart_pdgId'] == 25)
full_hh4b_samples['GenPart_status'][higgses]

np.unique(
    ak.to_numpy(
        ak.pad_none(full_hh4b_samples['GenPart_status'][higgses], 40, axis=1)))

ak.pad_none(hh4b_samples['GenPart_status'][higgses], 40, axis=1)
higgs_pt = genpart_pt[(full_hh4b_samples['GenPart_pdgId'] == 25)]

gt_300 = ak.sort(higgs_pt, axis=1)[:, -1] > 300

num_gt_300 = ak.sum(ak.sort(higgs_pt, axis=1)[:, -1] > 300)
num_gt_300 / len(higgs_pt)

fhiggs_pt = genpart_pt[(full_hh4b_samples['GenPart_pdgId'] == 25
                        )][full_hh4b_samples['GenPart_status'][higgses] == 22]

jet_pt = ak.pad_none(full_hh4b_samples['FatJet_pt'], 2, axis=1)[:, :2][gt_300]
jet_msd = ak.pad_none(full_hh4b_samples['FatJet_msoftdrop'], 2,
                      axis=1)[:, :2][gt_300]

ak.sum((jet_pt[:, 0] > 250) * (jet_pt[:, 1] > 250) * (jet_msd[:, 0] > 20) *
       (jet_msd[:, 1] > 20)) / len(jet_pt)
Example #18
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet)>=2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        
        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 15) & (np.abs(electron.eta) < 2.4)]

        electron = electron[(electron.genPartIdx >= 0)]
        electron = electron[(np.abs(electron.matched_gen.pdgId)==11)]  #from here on all leptons are gen-matched
        electron = electron[( (electron.genPartFlav==1) | (electron.genPartFlav==15) )] #and now they are all prompt
     
        
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]
        
        leading_parent = find_first_parent(leading_electron.matched_gen)
        trailing_parent = find_first_parent(trailing_electron.matched_gen)
        
       
        is_flipped = ( ( (electron.matched_gen.pdgId*(-1) == electron.pdgId) | (find_first_parent(electron.matched_gen)*(-1) == electron.pdgId) ) & (np.abs(electron.pdgId) == 11) )
        
        
        flipped_electron = electron[is_flipped]
        flipped_electron = flipped_electron[(ak.fill_none(flipped_electron.pt, 0)>0)]
        flipped_electron = flipped_electron[~(ak.is_none(flipped_electron))]
        n_flips = ak.num(flipped_electron)
                
        ##Muons
        muon     = Collections(ev, "Muon", "tightFCNC").get()
        muon = muon[(muon.pt > 15) & (np.abs(muon.eta) < 2.4)]
        
        muon = muon[(muon.genPartIdx >= 0)]
        muon = muon[(np.abs(muon.matched_gen.pdgId)==13)] #from here, all muons are gen-matched
        muon = muon[( (muon.genPartFlav==1) | (muon.genPartFlav==15) )] #and now they are all prompt
       
        
        ##Leptons

        lepton   = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton)==2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton)==2)
        
        emulepton = (ak.num(electron) == 1) & (ak.num(muon) == 1)
        no_mumu = (ak.num(muon) <= 1)
        
        
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        
        
        #jets
        jet       = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet       = jet[ak.argsort(jet.pt, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] 
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights( len(ev) )
        weight2 = Weights( len(ev))
        
        if not dataset=='MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)
            
        weight2.add("charge flip", self.charge_flip_ratio.flip_weight(electron))
                                   
                      
        #selections    
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)
        diele = (ak.num(electron) == 2)
        emu = (emulepton)
        flips = (n_flips == 1)
        no_flips = (n_flips == 0)
        nmm = no_mumu
        
        
        selection = PackedSelection()
        selection.add('filter',      (filters) )
        selection.add('ss',          ss )
        selection.add('os',          os )
        selection.add('jet',         jet_all )
        selection.add('ee',          diele)
        selection.add('emu',         emu)
        selection.add('flip',        flips)
        selection.add('nflip',       no_flips)
        selection.add('no_mumu',     nmm)
        
        bl_reqs = ['filter'] + ['jet']

        bl_reqs_d = { sel: True for sel in bl_reqs }
        baseline = selection.require(**bl_reqs_d)
        
        f_reqs = bl_reqs + ['flip'] + ['ss'] + ['ee']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)
        
        f2_reqs = bl_reqs + ['flip'] + ['ss'] + ['emu']
        f2_reqs_d = {sel: True for sel in f2_reqs}
        flip_sel2 = selection.require(**f2_reqs_d)
        
        f3_reqs = bl_reqs + ['flip'] + ['ss'] + ['no_mumu']
        f3_reqs_d = {sel: True for sel in f3_reqs}
        flip_sel3 = selection.require(**f3_reqs_d)
        
        nf_reqs = bl_reqs + ['nflip'] + ['os'] + ['ee']
        nf_reqs_d = {sel: True for sel in nf_reqs}
        n_flip_sel = selection.require(**nf_reqs_d)
        
        nf2_reqs = bl_reqs + ['nflip'] + ['os'] + ['emu']
        nf2_reqs_d = {sel: True for sel in nf2_reqs}
        n_flip_sel2 = selection.require(**nf2_reqs_d)
        
        nf3_reqs = bl_reqs + ['nflip'] + ['os'] + ['no_mumu']
        nf3_reqs_d = {sel: True for sel in nf3_reqs}
        n_flip_sel3 = selection.require(**nf3_reqs_d)
        
        s_reqs = bl_reqs + ['ss'] + ['no_mumu']
        s_reqs_d = { sel: True for sel in s_reqs }
        ss_sel = selection.require(**s_reqs_d)
        
        o_reqs = bl_reqs + ['os'] + ['no_mumu']
        o_reqs_d = {sel: True for sel in o_reqs }
        os_sel = selection.require(**o_reqs_d)
        
        ees_reqs = bl_reqs + ['ss'] + ['ee']
        ees_reqs_d = { sel: True for sel in ees_reqs }
        eess_sel = selection.require(**ees_reqs_d)
        
        eeo_reqs = bl_reqs + ['os'] + ['ee']
        eeo_reqs_d = {sel: True for sel in eeo_reqs }
        eeos_sel = selection.require(**eeo_reqs_d)
        
        ems_reqs = bl_reqs + ['ss'] + ['emu']
        ems_reqs_d = { sel: True for sel in ems_reqs }
        emss_sel = selection.require(**ems_reqs_d)
        
        emo_reqs = bl_reqs + ['os'] + ['emu']
        emo_reqs_d = {sel: True for sel in emo_reqs }
        emos_sel = selection.require(**emo_reqs_d)
        
       
        #outputs
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[baseline], weight=weight.weight()[baseline])
        
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(lepton)[ss_sel], weight=weight.weight()[ss_sel])
                      
        output['N_ele2'].fill(dataset=dataset, multiplicity=ak.num(lepton)[os_sel], weight=weight2.weight()[os_sel])
        
        output['electron_flips'].fill(dataset=dataset, multiplicity = n_flips[flip_sel], weight=weight.weight()[flip_sel])

        output['electron_flips2'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel], weight=weight2.weight()[n_flip_sel])
        
        output['electron_flips3'].fill(dataset=dataset, multiplicity = n_flips[flip_sel2], weight=weight.weight()[flip_sel2])

        output['electron_flips4'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel2], weight=weight2.weight()[n_flip_sel2])
        

        output["electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel3].eta))),
            weight = weight.weight()[flip_sel3]
        )
        
        output["electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].eta))),
            weight = weight2.weight()[n_flip_sel3]
        )
        
        output["flipped_electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel].eta))),
            weight = weight.weight()[flip_sel]
        )
        
        output["flipped_electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].eta))),
            weight = weight2.weight()[n_flip_sel]
        )
        
        output["flipped_electron3"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel2].eta))),
            weight = weight.weight()[flip_sel2]
        )
        
        output["flipped_electron4"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].eta))),
            weight = weight2.weight()[n_flip_sel2]
        )
        
        #output["lepton_parent"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(leading_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)
        #
        #output["lepton_parent2"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(trailing_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)

        return output
Example #19
0
lct_trigger_rate = []
alct_daq_rate = []
clct_daq_rate = []
lct_daq_rate = []

gem_trigger_rate = []
gem_daq_rate = []

## trigger rates for each station
for ids in range(0, 9):
    station = id_to_station[ids][0]
    ring = id_to_station[ids][1]
    cuts = ((csc_alct.bx == 3) & (csc_alct.station == station) &
            (csc_alct.ring == ring))
    ak.to_list(cuts)
    passEvents = ak.sum(cuts)
    objectRate = passEvents * normalization
    print(
        "Trigger Rate for ALCT for CSC station = %d, ring = %d (BX3): %f kHz" %
        (station, ring, objectRate))

for ids in range(0, 9):
    station = id_to_station[ids][0]
    ring = id_to_station[ids][1]
    cuts = ((csc_clct.bx == 7) & (csc_clct.station == station) &
            (csc_clct.ring == ring))
    ak.to_list(cuts)
    passEvents = ak.sum(cuts)
    objectRate = passEvents * normalization
    print(
        "Trigger Rate for CLCT for CSC station = %d, ring = %d (BX7): %f kHz" %
Example #20
0
    def process(self, df):
        output = self.accumulator.identity()

        # important variables
        obj = Objects(df)
        mask = bl.getBaselineMask(df)

        electrons = obj.goodElectrons()[mask]
        muons = obj.goodMuons()[mask]
        jets = obj.goodJets()[mask]
        madHT_cut = df['madHT'][mask]
        met = df['MET'][mask]
        metPhi = df['METPhi'][mask]
        tracks = obj.goodTracks()[mask]
        output['cutflow']['all events'] += jets.size

        # Hack: defining weights: same length as the number of events in the chunk
        # Check if this is what other people do too
        luminosity = 21071.0 + 38654.0
        evtw = df['Weight'][mask] * luminosity
        ew = utl.awkwardReshape(electrons, evtw)
        mw = utl.awkwardReshape(muons, evtw)
        jw = utl.awkwardReshape(jets, evtw)
        tw = utl.awkwardReshape(tracks, evtw)

        # Getting subset of variables based on number of AK8 jets
        # calculating event variables
        ht = ak.sum(jets.pt, axis=1)
        st = ht + met
        #dPhiMinJ = utl.deltaPhi(fjets.phi,metPhi).min()
        dPhiMinj = utl.deltaPhi(jets.phi, metPhi).min()

        #
        # Trigger Study...
        #
        cut_met = met > 250
        cut_ht_offline = ht > 1200
        cut_ht_scouting = ht > 500
        jets_met = jets[cut_met]
        jets_ht_off = jets[cut_ht_offline]
        jets_ht_scout = jets[cut_ht_scouting]
        output['trigger']['all events'] += jets.size
        output['trigger']['met'] += jets_met.size
        output['trigger']['ht_offline'] += jets_ht_off.size
        output['trigger']['ht_scouting'] += jets_ht_scout.size

        ## at least 1 AK4 Jet
        cut_1j = jets.counts >= 1
        jets_1j = jets[cut_1j]
        metPhi_1j = metPhi[cut_1j]
        evtw_1j = evtw[cut_1j]
        dPhij1 = utl.deltaPhi(jets_1j.phi[:, 0], metPhi_1j)
        output['cutflow']['one jet'] += jets_1j.size

        ## at least 2 AK4 Jets
        cut_2j = jets.counts >= 2
        jets_2j = jets[cut_2j]
        metPhi_2j = metPhi[cut_2j]
        evtw_2j = evtw[cut_2j]
        j1_eta = np.array(jets_2j.eta[:, 0])
        j2_eta = np.array(jets_2j.eta[:, 1])
        j1_phi = np.array(jets_2j.phi[:, 0])
        j2_phi = np.array(jets_2j.phi[:, 1])
        dEtaj12 = abs(j1_eta - j2_eta)
        deltaR12j = utl.delta_R(j1_phi, j2_phi, j1_eta, j2_eta)
        dPhij1_2j = utl.deltaPhi(j1_phi, metPhi_2j)
        dPhij2 = utl.deltaPhi(j2_phi, metPhi_2j)
        output['cutflow']['two jets'] += jets_2j.size

        ### at least 1 AK8 Jet
        #cut_1fj = fjets.counts >= 1
        #fjets_1fj = fjets[cut_1fj]
        #metPhi_1fj = metPhi[cut_1fj]
        #evtw_1fj = evtw[cut_1fj]
        #dPhiJ1 = utl.deltaPhi(fjets_1fj.phi[:,0],metPhi_1fj)
        ### at least 2 AK8 Jets
        #cut_2fj = fjets.counts >= 2
        #fjets_2fj = fjets[cut_2fj]
        #metPhi_2fj = metPhi[cut_2fj]
        #evtw_2fj = evtw[cut_2fj]
        #J1_eta = np.array(fjets_2fj.eta[:,0])
        #J2_eta = np.array(fjets_2fj.eta[:,1])
        #J1_phi = np.array(fjets_2fj.phi[:,0])
        #J2_phi = np.array(fjets_2fj.phi[:,1])
        #dEtaJ12 = abs(J1_eta - J2_eta)
        #deltaR12J = utl.delta_R(J1_phi,J2_phi,J1_eta,J2_eta)
        #dPhiJ1_2fj = utl.deltaPhi(J1_phi,metPhi_2fj)
        #dPhiJ2 = utl.deltaPhi(J2_phi,metPhi_2fj)

        ## twofjets = (fjets.counts >= 2)
        ## difjets = fjets[twofjets]
        ## ptcut = (difjets.pt[:,0] > 200) & (difjets.pt[:,1] > 200)
        ## difjets_pt200 = difjets[ptcut]

        #twofjets = (fjets.counts >= 2)
        #output['cutflow']['two fjets'] += twofjets.sum()

        ## difjets = fjets[twofjets]
        ## difjets_pt200 = difjets[ptcut]
        ## output['jtpt'].fill(dataset=dataset, pt=fjets.pt.flatten())
        ## output['jteta'].fill(dataset=dataset, eta=fjets.eta.flatten())

        output['h_ntracks'].fill(ntracks=tracks.counts.flatten(), weight=evtw)
        output['h_njets'].fill(njets=jets.counts.flatten(), weight=evtw)
        output['h_ht'].fill(ht=ht, weight=evtw)
        output['h_st'].fill(st=st, weight=evtw)
        output['h_met'].fill(MET=met, weight=evtw)
        #output['h_jPt'].fill(pt=jets.pt.flatten(),weight=ak.flatten(jw))
        #output['h_jEta'].fill(eta=jets.eta.flatten(),weight=ak.flatten(jw))
        #output['h_jPhi'].fill(phi=jets.phi.flatten(),weight=ak.flatten(jw))
        #output['h_jAxismajor'].fill(axismajor=jets.axismajor.flatten(),weight=ak.flatten(jw))
        #output['h_jAxisminor'].fill(axisminor=jets.axisminor.flatten(),weight=ak.flatten(jw))
        #output['h_jPtD'].fill(ptD=jets.ptD.flatten(),weight=ak.flatten(jw))
        #output['h_jPtAK8'].fill(pt=fjets.pt.flatten(),weight=ak.flatten(fjw))
        #output['h_jEtaAK8'].fill(eta=fjets.eta.flatten(),weight=ak.flatten(fjw))
        #output['h_jPhiAK8'].fill(phi=fjets.phi.flatten(),weight=ak.flatten(fjw))
        #output['h_jAxismajorAK8'].fill(axismajor=fjets.axismajor.flatten(),weight=ak.flatten(fjw))
        #output['h_jAxisminorAK8'].fill(axisminor=fjets.axisminor.flatten(),weight=ak.flatten(fjw))
        #output['h_jGirthAK8'].fill(girth=fjets.girth.flatten(),weight=ak.flatten(fjw))
        #output['h_jPtDAK8'].fill(ptD=fjets.ptD.flatten(),weight=ak.flatten(fjw))
        #output['h_jTau1AK8'].fill(tau1=fjets.tau1.flatten(),weight=ak.flatten(fjw))
        #output['h_jTau2AK8'].fill(tau2=fjets.tau2.flatten(),weight=ak.flatten(fjw))
        #output['h_jTau3AK8'].fill(tau3=fjets.tau3.flatten(),weight=ak.flatten(fjw))
        #output['h_jTau21AK8'].fill(tau21=fjets[fjets.tau1 > 0].tau2.flatten()/fjets[fjets.tau1 > 0].tau1.flatten(),weight=ak.flatten(fjw)[(fjets.tau1 > 0).flatten()])
        #output['h_jTau32AK8'].fill(tau32=fjets[fjets.tau2 > 0].tau3.flatten()/fjets[fjets.tau2 > 0].tau2.flatten(),weight=ak.flatten(fjw)[(fjets.tau2 > 0).flatten()])
        #output['h_jSoftDropMassAK8'].fill(softDropMass=fjets.softDropMass.flatten(),weight=ak.flatten(fjw))
        #output['h_dEtaJ12'].fill(dEtaJ12=dEtaJ12,weight=evtw_2fj)
        #output['h_dRJ12'].fill(dRJ12=deltaR12J,weight=evtw_2fj)
        #output['h_dPhiJ1MET'].fill(dPhiJMET=dPhiJ1,weight=evtw_1fj)
        #output['h_dPhiJ2MET'].fill(dPhiJMET=dPhiJ2,weight=evtw_2fj)
        #output['h_dPhiMinJMET'].fill(dPhiJMET=dPhiMinJ,weight=evtw)
        #output['h_dPhiJ1METrdPhiJ2MET'].fill(dPhiJ1METrdPhiJ2MET=dPhiJ1_2fj[dPhiJ2>0]/dPhiJ2[dPhiJ2>0],weight=evtw_2fj[dPhiJ2>0])
        #output['h_mT'].fill(mT=mtAK8,weight=evtw)
        #output['h_METrHT_pt30'].fill(METrHT_pt30=met[ht>0]/ht[ht>0],weight=evtw[ht>0])
        #output['h_METrST_pt30'].fill(METrST_pt30=met[st>0]/st[st>0],weight=evtw[st>0])
        ## Cut: at least 2 AK8 jets
        #output['h_njets_ge2AK8j'].fill(njets=jets[cut_2fj].counts.flatten(),weight=evtw[cut_2fj])
        #output['h_njetsAK8_ge2AK8j'].fill(njets=fjets[cut_2fj].counts.flatten(),weight=evtw[cut_2fj])
        #output['h_ht_ge2AK8j'].fill(ht=ht[cut_2fj],weight=evtw[cut_2fj])
        #output['h_st_ge2AK8j'].fill(st=st[cut_2fj],weight=evtw[cut_2fj])
        #output['h_met_ge2AK8j'].fill(MET=met[cut_2fj],weight=evtw[cut_2fj])
        #output['h_dPhiJ1MET_ge2AK8j'].fill(dPhiJMET=dPhiJ1[fjets_1fj.counts >= 2],weight=evtw_1fj[fjets_1fj.counts >= 2])
        #output['h_dPhiMinJMET_ge2AK8j'].fill(dPhiJMET=dPhiMinJ[cut_2fj],weight=evtw[cut_2fj])
        #output['h_METrHT_pt30_ge2AK8j'].fill(METrHT_pt30=met[(cut_2fj) & (ht>0)]/ht[(cut_2fj) & (ht>0)],weight=evtw[(cut_2fj) & (ht>0)])
        #output['h_METrST_pt30_ge2AK8j'].fill(METrST_pt30=met[(cut_2fj) & (st>0)]/st[(cut_2fj) & (st>0)],weight=evtw[(cut_2fj) & (st>0)])
        #output['h_mT_ge2AK8j'].fill(mT=mtAK8[cut_2fj],weight=evtw[cut_2fj])
        ## more AK4 jets variables
        #output['h_dEtaj12'].fill(dEtaJ12=dEtaj12,weight=evtw_2j)
        #output['h_dRj12'].fill(dRJ12=deltaR12j,weight=evtw_2j)
        #output['h_dPhij1MET'].fill(dPhiJMET=dPhij1,weight=evtw_1j)
        #output['h_dPhij2MET'].fill(dPhiJMET=dPhij2,weight=evtw_2j)
        #output['h_dPhiMinjMET'].fill(dPhiJMET=dPhiMinj,weight=evtw)
        #output['h_dPhij1METrdPhij2MET'].fill(dPhiJ1METrdPhiJ2MET=dPhij1_2j[dPhij2>0]/dPhij2[dPhij2>0],weight=evtw_2j[dPhij2>0])
        ## Cut: at least 2 AK4 jets
        #output['h_njets_ge2AK4j'].fill(njets=jets[cut_2j].counts.flatten(),weight=evtw[cut_2j])
        #output['h_njetsAK8_ge2AK4j'].fill(njets=fjets[cut_2j].counts.flatten(),weight=evtw[cut_2j])
        #output['h_ht_ge2AK4j'].fill(ht=ht[cut_2j],weight=evtw[cut_2j])
        #output['h_st_ge2AK4j'].fill(st=st[cut_2j],weight=evtw[cut_2j])
        #output['h_met_ge2AK4j'].fill(MET=met[cut_2j],weight=evtw[cut_2j])
        #output['h_dPhij1MET_ge2AK4j'].fill(dPhiJMET=dPhij1[jets_1j.counts >= 2],weight=evtw_1j[jets_1j.counts >= 2])
        #output['h_dPhiMinjMET_ge2AK4j'].fill(dPhiJMET=dPhiMinj[cut_2j],weight=evtw[cut_2j])
        #output['h_METrHT_pt30_ge2AK4j'].fill(METrHT_pt30=met[(cut_2j) & (ht>0)]/ht[(cut_2j) & (ht>0)],weight=evtw[(cut_2j) & (ht>0)])
        #output['h_METrST_pt30_ge2AK4j'].fill(METrST_pt30=met[(cut_2j) & (st>0)]/st[(cut_2j) & (st>0)],weight=evtw[(cut_2j) & (st>0)])
        #output['h_mT_ge2AK4j'].fill(mT=mtAK8[cut_2j],weight=evtw[cut_2j])
        #output['h_madHT'].fill(ht=madHT_cut,weight=evtw)
        return output
Example #21
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:  #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt -
                            (selected_jets[:, 1] +
                             selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt -
                            (selected_jets[:, 2] +
                             selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt -
                            (selected_jets[:, 0] +
                             selected_jets[:, 1]).pt) / m3j

        # Event selection masks
        selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(
            **{name: True
               for name in selection.names})
        selection_masks["Pre-selection"] = sel_mask

        # HLT_trigger (this is already done at the beginning)
        # if year == "2016":
        # JetHLT_mask = []
        # if "2016B2" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # elif "2016H" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # else:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2017":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2018":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]

        # Fill histograms
        for selection, selection_mask in selection_masks.items():
            output["mjjj"].fill(dataset=dataset_name,
                                selection=selection,
                                mjjj=m3j[selection_mask])

            output["m_ij"].fill(dataset=dataset_name,
                                selection=selection,
                                m_01=m_ij[:, 0][selection_mask],
                                m_12=m_ij[:, 1][selection_mask],
                                m_20=m_ij[:, 2][selection_mask])

            output["dR_ij"].fill(dataset=dataset_name,
                                 selection=selection,
                                 dR_01=dR_ij[:, 0][selection_mask],
                                 dR_12=dR_ij[:, 1][selection_mask],
                                 dR_20=dR_ij[:, 2][selection_mask])

            output["dEta_ij"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dEta_01=dEta_ij[:, 0][selection_mask],
                                   dEta_12=dEta_ij[:, 1][selection_mask],
                                   dEta_20=dEta_ij[:, 2][selection_mask])

            output["moverM_ij"].fill(dataset=dataset_name,
                                     selection=selection,
                                     moverM_01=m_01_overM[selection_mask],
                                     moverM_12=m_12_overM[selection_mask],
                                     moverM_20=m_20_overM[selection_mask])

            output["pt_i"].fill(dataset=dataset_name,
                                selection=selection,
                                pt_0=selected_jets[:, 0][selection_mask].pt,
                                pt_1=selected_jets[:, 1][selection_mask].pt,
                                pt_2=selected_jets[:, 2][selection_mask].pt)

            output["eta_i"].fill(dataset=dataset_name,
                                 selection=selection,
                                 eta_0=selected_jets[:, 0][selection_mask].eta,
                                 eta_1=selected_jets[:, 1][selection_mask].eta,
                                 eta_2=selected_jets[:, 2][selection_mask].eta)

            output["ptoverM_i"].fill(dataset=dataset_name,
                                     selection=selection,
                                     ptoverM_0=pt_i_overM[:,
                                                          0][selection_mask],
                                     ptoverM_1=pt_i_overM[:,
                                                          1][selection_mask],
                                     ptoverM_2=pt_i_overM[:,
                                                          2][selection_mask])

            output["dR_i_jk"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dR_0_12=dR_i_jk[:, 0][selection_mask],
                                   dR_1_20=dR_i_jk[:, 1][selection_mask],
                                   dR_2_01=dR_i_jk[:, 2][selection_mask])

            output["dEta_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dEta_0_12=dEta_i_jk[:, 0][selection_mask],
                                     dEta_1_20=dEta_i_jk[:, 1][selection_mask],
                                     dEta_2_01=dEta_i_jk[:, 2][selection_mask])

            output["dPhi_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dPhi_0_12=dPhi_i_jk[:, 0][selection_mask],
                                     dPhi_1_20=dPhi_i_jk[:, 1][selection_mask],
                                     dPhi_2_01=dPhi_i_jk[:, 2][selection_mask])

            output["dPtoverM_i_jk"].fill(
                dataset=dataset_name,
                selection=selection,
                dPtoverM_0_12=dPtoverM_0_12[selection_mask],
                dPtoverM_1_20=dPtoverM_1_20[selection_mask],
                dPtoverM_2_01=dPtoverM_2_01[selection_mask])
            pt_i_overM_2fill = pt_i_overM[selection_mask]
            dR_ij_2fill = dR_ij[selection_mask]
            dEta_ij_2fill = dEta_ij[selection_mask]
            dR_i_jk_2fill = dR_i_jk[selection_mask]
            dEta_i_jk_2fill = dEta_i_jk[selection_mask]
            dPhi_i_jk_2fill = dPhi_i_jk[selection_mask]
            dPtoverM_0_12_2fill = dPtoverM_0_12[selection_mask]
            dPtoverM_1_20_2fill = dPtoverM_1_20[selection_mask]
            dPtoverM_2_01_2fill = dPtoverM_2_01[selection_mask]
            selected_jets_2fill = selected_jets[selection_mask]

            max_pt_overM_2fill = awk.max(pt_i_overM_2fill, axis=1)
            min_pt_overM_2fill = awk.min(pt_i_overM_2fill, axis=1)
            max_dR_2fill = awk.max(dR_ij_2fill, axis=1)
            max_dEta_2fill = awk.max(dEta_ij_2fill, axis=1)
            min_dR_2fill = awk.min(dR_ij_2fill, axis=1)
            min_dEta_2fill = awk.min(dEta_ij_2fill, axis=1)
            min_pt_2fill = awk.min(selected_jets_2fill.pt, axis=1)
            max_eta_2fill = awk.max(abs(selected_jets_2fill.eta), axis=1)
            max_dR_i_jk_2fill = awk.max(dR_i_jk_2fill, axis=1)
            min_dR_i_jk_2fill = awk.min(dR_i_jk_2fill, axis=1)
            max_dEta_i_jk_2fill = awk.max(dEta_i_jk_2fill, axis=1)
            min_dEta_i_jk_2fill = awk.min(dEta_i_jk_2fill, axis=1)
            max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk_2fill, axis=1)
            min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk_2fill, axis=1)
            max_dPtoverM_i_jk_2fill = []
            min_dPtoverM_i_jk_2fill = []
            for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill,
                            dPtoverM_2_01_2fill):
                max_dPtoverM_i_jk_2fill.append(max(pair))
                min_dPtoverM_i_jk_2fill.append(min(pair))
            max_dPtoverM_i_jk_2fill = np.array(max_dPtoverM_i_jk_2fill)
            min_dPtoverM_i_jk_2fill = np.array(min_dPtoverM_i_jk_2fill)

            max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
            min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
            max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
            max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
            min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
            min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
            min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
            max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
            max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
            min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
            max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
            min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
            max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
            min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)

            output["max_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  max_dR=max_dR_2fill)

            output["max_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    max_dEta=max_dEta_2fill)

            output["min_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_dR=min_dR_2fill)

            output["min_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    min_dEta=min_dEta_2fill)

            output["min_pt"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_pt=min_pt_2fill)

            output["max_eta"].fill(dataset=dataset_name,
                                   selection=selection,
                                   max_eta=max_eta_2fill)

            output["max_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_ptoverM=max_pt_overM_2fill)

            output["min_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_ptoverM=min_pt_overM_2fill)

            output["max_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_dR_j_jj=max_dR_i_jk_2fill)

            output["max_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dEta_j_jj=max_dEta_i_jk_2fill)

            output["max_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dPhi_j_jj=max_dPhi_i_jk_2fill)

            output["max_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                max_dPtoverM_j_jj=max_dPtoverM_i_jk_2fill)

            output["min_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_dR_j_jj=min_dR_i_jk_2fill)

            output["min_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dEta_j_jj=min_dEta_i_jk_2fill)

            output["min_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dPhi_j_jj=min_dPhi_i_jk_2fill)

            output["min_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                min_dPtoverM_j_jj=min_dPtoverM_i_jk_2fill)

        return output
Example #22
0
    def process(self, events):
        output = self.accumulator.identity()

        dataset = events.metadata['dataset']
        output['sumw'][dataset] += ak.sum(events.genWeight)
        
        ##############
        # Trigger level
        triggers = [
        "HLT_Mu12_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_DZ",
        "HLT_Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL_DZ",    
        ]
        
        trig_arrs = [events.HLT[_trig.strip("HLT_")] for _trig in triggers]
        req_trig = np.zeros(len(events), dtype='bool')
        for t in trig_arrs:
            req_trig = req_trig | t

        ############
        # Event level
        
        ## Muon cuts
        # muon twiki: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2
        events.Muon = events.Muon[(events.Muon.pt > 30) & (abs(events.Muon.eta < 2.4))] # & (events.Muon.tightId > .5)
        events.Muon = ak.pad_none(events.Muon, 1, axis=1) 
        req_muon =(ak.count(events.Muon.pt, axis=1) == 1)
        
        ## Electron cuts
        # electron twiki: https://twiki.cern.ch/twiki/bin/viewauth/CMS/CutBasedElectronIdentificationRun2
        events.Electron = events.Electron[(events.Electron.pt > 30) & (abs(events.Electron.eta) < 2.4)]
        events.Electron = ak.pad_none(events.Electron, 1, axis=1) 
        req_ele = (ak.count(events.Electron.pt, axis=1) == 1)
        
        ## Jet cuts
        events.Jet = events.Jet[(events.Jet.pt > 25) & (abs(events.Jet.eta) <= 2.5)]
        req_jets = (ak.count(events.Jet.pt, axis=1) >= 2)    
        
        req_opposite_charge = events.Electron[:, 0].charge * events.Muon[:, 0].charge == -1
        
        event_level = req_trig & req_muon & req_ele & req_opposite_charge & req_jets
        
        # Selected
        selev = events[event_level]    
        
        #########
        
        # Per electron
        el_eta   = (abs(selev.Electron.eta) <= 2.4)
        el_pt    = selev.Electron.pt > 30
        el_level = el_eta & el_pt
        
        # Per muon
        mu_eta   = (abs(selev.Muon.eta) <= 2.4)
        mu_pt    = selev.Muon.pt > 30
        mu_level = mu_eta & mu_pt
        
        # Per jet
        jet_eta    = (abs(selev.Jet.eta) <= 2.4)
        jet_pt     = selev.Jet.pt > 25
        jet_pu     = ( ((selev.Jet.puId > 6) & (selev.Jet.pt < 50)) | (selev.Jet.pt > 50) ) 
        jet_id     = selev.Jet.jetId >= 2 
        #jet_id     = selev.Jet.isTight() == 1 & selev.Jet.isTightLeptonVeto() == 0
        jet_level  = jet_pu & jet_eta & jet_pt & jet_id

        # b-tag twiki : https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X
        bjet_disc_t  = selev.Jet.btagDeepB > 0.7264 # L=0.0494, M=0.2770, T=0.7264
        bjet_disc_m  = selev.Jet.btagDeepB > 0.2770 # L=0.0494, M=0.2770, T=0.7264
        bjet_disc_l  = selev.Jet.btagDeepB > 0.0494 # L=0.0494, M=0.2770, T=0.7264
        bjet_level_t = jet_level & bjet_disc_t
        bjet_level_m = jet_level & bjet_disc_m
        bjet_level_l = jet_level & bjet_disc_l
        
        sel    = selev.Electron[el_level]
        smu    = selev.Muon[mu_level]
        sjets  = selev.Jet[jet_level]
        sbjets_t = selev.Jet[bjet_level_t]
        sbjets_m = selev.Jet[bjet_level_m]
        sbjets_l = selev.Jet[bjet_level_l]
        
        # output['pt'].fill(dataset=dataset, pt=selev.Jet.pt.flatten())
        # Fill histograms dynamically  
        for histname, h in output.items():
            if (histname not in self.jet_hists) and (histname not in self.deepcsv_hists): continue
            # Get valid fields perhistogram to fill
            fields = {k: ak.flatten(sjets[k], axis=None) for k in h.fields if k in dir(sjets)}
            h.fill(dataset=dataset, **fields)


        def flatten(ar): # flatten awkward into a 1d array to hist
            return ak.flatten(ar, axis=None)

        def num(ar):
            return ak.num(ak.fill_none(ar[~ak.is_none(ar)], 0), axis=0)

        output['njet'].fill(dataset=dataset,  njet=flatten(ak.num(sjets)))
        output['nbjet_t'].fill(dataset=dataset, nbjet_t=flatten(ak.num(sbjets_t)))
        output['nbjet_m'].fill(dataset=dataset, nbjet_m=flatten(ak.num(sbjets_m)))
        output['nbjet_l'].fill(dataset=dataset, nbjet_l=flatten(ak.num(sbjets_l)))
        output['nel'].fill(dataset=dataset,   nel=flatten(ak.num(sel)))
        output['nmu'].fill(dataset=dataset,   nmu=flatten(ak.num(smu)))

        output['lelpt'].fill(dataset=dataset, lelpt=flatten(selev.Electron[:, 0].pt))
        output['lmupt'].fill(dataset=dataset, lmupt=flatten(selev.Muon[:, 0].pt))
        output['ljpt'].fill(dataset=dataset,  ljpt=flatten(selev.Jet[:, 0].pt))
        output['sljpt'].fill(dataset=dataset,  sljpt=flatten(selev.Jet[:, 1].pt))

        return output
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Generated leptons
        gen_lep = ev.GenL
        leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
        trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
        n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        ## event selectors
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        
        dilep     = ((ak.num(electron) + ak.num(muon))==2)
        pos_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))>0)
        lep0pt    = ((ak.num(electron[(electron.pt>25)]) + ak.num(muon[(muon.pt>25)]))>0)
        lep0pt_40 = ((ak.num(electron[(electron.pt>40)]) + ak.num(muon[(muon.pt>40)]))>0)
        lep0pt_100 = ((ak.num(electron[(electron.pt>100)]) + ak.num(muon[(muon.pt>100)]))>0)
        lep1pt    = ((ak.num(electron[(electron.pt>20)]) + ak.num(muon[(muon.pt>20)]))>1)
        lep1pt_30 = ((ak.num(electron[(electron.pt>30)]) + ak.num(muon[(muon.pt>30)]))>1)
        lepveto   = ((ak.num(vetoelectron) + ak.num(vetomuon))==2)
        
        # define the weight
        weight = Weights( len(ev) )
        
        #mult = 1
        #if dataset=='inclusive': mult = 0.0478/47.448
        #if dataset=='plus': mult = 0.0036/7.205

        if not dataset=='MuonEG':
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        selection = PackedSelection()
        selection.add('lepveto',       lepveto)
        selection.add('dilep',         dilep )
        selection.add('filter',        (filters) )
        selection.add('p_T(lep0)>25',  lep0pt )
        selection.add('p_T(lep0)>40',  lep0pt_40 )
        selection.add('p_T(lep1)>20',  lep1pt )
        selection.add('p_T(lep1)>30',  lep1pt_30 )
        selection.add('SS',            ( SSlepton | SSelectron | SSmuon) )
        selection.add('pos',           ( pos_charge ) )
        selection.add('neg',           ( neg_charge ) )
        selection.add('N_jet>3',       (ak.num(jet)>=4) )
        selection.add('N_jet>4',       (ak.num(jet)>=5) )
        selection.add('N_central>2',   (ak.num(central)>=3) )
        selection.add('N_central>3',   (ak.num(central)>=4) )
        selection.add('N_btag>0',      (ak.num(btag)>=1) )
        selection.add('MET>50',        (ev.MET.pt>50) )
        selection.add('ST',            (st>600) )
        selection.add('N_fwd>0',       (ak.num(fwd)>=1 ))
        selection.add('delta_eta',     (ak.any(delta_eta>2, axis=1) ) )
        selection.add('fwd_p>500',     (ak.any(j_fwd.p>500, axis=1) ) )
        
        ss_reqs = ['lepveto', 'dilep', 'SS', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'N_jet>3', 'N_central>2', 'N_btag>0']
        bl_reqs = ss_reqs + ['N_fwd>0', 'N_jet>4', 'N_central>3', 'ST', 'MET>50', 'delta_eta']
        sr_reqs = bl_reqs + ['fwd_p>500', 'p_T(lep0)>40', 'p_T(lep1)>30']

        ss_reqs_d = { sel: True for sel in ss_reqs }
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = { sel: True for sel in bl_reqs }
        BL = selection.require(**bl_reqs_d)
        sr_reqs_d = { sel: True for sel in sr_reqs }
        SR = selection.require(**sr_reqs_d)

        cutflow     = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in sr_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow( req, selection.require(**cutflow_reqs_d) )
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvs, weight=weight.weight()[ss_selection])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvsGood, weight=weight.weight()[ss_selection])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[ss_selection], weight=weight.weight()[ss_selection])
        output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL])
        output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL])
        output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL])
        output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL])
        output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL])
        output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])
        output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[ss_selection], n2=n_nonprompt[ss_selection], n_ele=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[ss_selection].pt,
            phi  = ev.MET[ss_selection].phi,
            weight = weight.weight()[ss_selection]
        )

        output['lead_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )

        output['trail_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )
        
        
        return output
Example #24
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        max_dR = awk.max(dR_ij, axis=1)
        max_dEta = awk.max(dEta_ij, axis=1)
        min_dR = awk.min(dR_ij, axis=1)
        min_dEta = awk.min(dEta_ij, axis=1)
        min_pT = awk.min(selected_jets.pt, axis=1)
        max_eta = abs(awk.max(selected_jets.eta, axis=1))

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)
        dPt_i_jk = abs(selected_jets[:, jet_i].pt -
                       (selected_jets[:, jet_j] + selected_jets[:, jet_k]).pt)

        max_dPhi_jjj = awk.max(dPhi_i_jk, axis=1)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        max_pt_overM = awk.max(pt_i_overM, axis=1)
        min_pt_overM = awk.min(pt_i_overM, axis=1)
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j

        for pt_cut in range(30, 1150, 5):
            cut_name = f"min_pT_cut{pt_cut}".format(pt_cut)
            selection = PackedSelection()
            selection.add("MinJetPt_cut", min_pT > pt_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_min_pT_cut{pt_cut}".format(
                pt_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for eta_cut in np.arange(0, 2.5, 0.05):
            cut_name = f"max_eta_cut{eta_cut}".format(eta_cut)
            selection = PackedSelection()
            selection.add("MaxJetEta_cut", max_eta < eta_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_max_eta_cut{eta_cut}".format(
                eta_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dEta_max_cut in np.arange(0, 5, 0.1):
            cut_name = f"dEta_max_cut{dEta_max_cut}".format(dEta_max_cut)
            selection = PackedSelection()
            selection.add("MaxJJdEta_cut", max_dEta < dEta_max_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dEta_jj_max_cut{dEta_max_cut}".format(
                dEta_max_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dR_min_cut in np.arange(0, 5, 0.1):
            cut_name = f"dR_min_cut{dR_min_cut}".format(dR_min_cut)
            selection = PackedSelection()
            selection.add("MinJJdR_cut", min_dR > dR_min_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dR_jj_min_cut{dR_min_cut}".format(
                dR_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        #min cut for the variable dPhi_jjj_max
        # for dPhi_jjj_max_min_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        # for dPhi_jjj_min_max_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        return output
Example #25
0
Lep5 = ((Elec5_PT_arr.count()) + (Muon5_PT_arr.count()) >= 2)
Lep6 = ((Elec6_PT_arr.count()) + (Muon6_PT_arr.count()) >= 2)
Lep7 = ((Elec7_PT_arr.count()) + (Muon7_PT_arr.count()) >= 2)
Lep8 = ((Elec8_PT_arr.count()) + (Muon8_PT_arr.count()) >= 2)

Jet1 = ((Jet1_PT_arr.count()) >= 2)
Jet2 = ((Jet2_PT_arr.count()) >= 2)
Jet3 = ((Jet3_PT_arr.count()) >= 2)
Jet4 = ((Jet4_PT_arr.count()) >= 2)
Jet5 = ((Jet5_PT_arr.count()) >= 2)
Jet6 = ((Jet6_PT_arr.count()) >= 2)
Jet7 = ((Jet7_PT_arr.count()) >= 2)
Jet8 = ((Jet8_PT_arr.count()) >= 2)

## scalar sum of lepton pt
lep1_pt_sum = ak.sum(Elec1_PT_arr, axis=-1) + ak.sum(Muon1_PT_arr,
                                                     axis=-1) > 120
lep2_pt_sum = ak.sum(Elec2_PT_arr, axis=-1) + ak.sum(Muon2_PT_arr,
                                                     axis=-1) > 120
lep3_pt_sum = ak.sum(Elec3_PT_arr, axis=-1) + ak.sum(Muon3_PT_arr,
                                                     axis=-1) > 120
lep4_pt_sum = ak.sum(Elec4_PT_arr, axis=-1) + ak.sum(Muon4_PT_arr,
                                                     axis=-1) > 120
lep5_pt_sum = ak.sum(Elec5_PT_arr, axis=-1) + ak.sum(Muon5_PT_arr,
                                                     axis=-1) > 120
lep6_pt_sum = ak.sum(Elec6_PT_arr, axis=-1) + ak.sum(Muon6_PT_arr,
                                                     axis=-1) > 120
lep7_pt_sum = ak.sum(Elec7_PT_arr, axis=-1) + ak.sum(Muon7_PT_arr,
                                                     axis=-1) > 120
lep8_pt_sum = ak.sum(Elec8_PT_arr, axis=-1) + ak.sum(Muon8_PT_arr,
                                                     axis=-1) > 120
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)
        triggers = getTriggers(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(electron) == 1) & (ak.num(muon) == 1))
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(vetoelectron) + ak.num(vetomuon)) == 2)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('trigger', (triggers))
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('OS', OSlepton)
        selection.add('N_btag=2', (ak.num(btag) == 2))
        selection.add('N_jet>2', (ak.num(jet) >= 3))
        selection.add('N_central>1', (ak.num(central) >= 2))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))
        selection.add('MET>30', (ev.MET.pt > 30))

        os_reqs = [
            'lepveto', 'dilep', 'trigger', 'filter', 'p_T(lep0)>25',
            'p_T(lep1)>20', 'OS'
        ]
        bl_reqs = os_reqs + [
            'N_btag=2', 'N_jet>2', 'N_central>1', 'N_fwd>0', 'MET>30'
        ]

        os_reqs_d = {sel: True for sel in os_reqs}
        os_selection = selection.require(**os_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        cutflow = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in bl_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow(req, selection.require(**cutflow_reqs_d))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[os_selection].npvs,
                               weight=weight.weight()[os_selection])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_selection].npvsGood,
                                   weight=weight.weight()[os_selection])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[os_selection],
                           weight=weight.weight()[os_selection])
        output['N_central'].fill(dataset=dataset,
                                 multiplicity=ak.num(central)[os_selection],
                                 weight=weight.weight()[os_selection])
        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(electron)[os_selection],
                            weight=weight.weight()[os_selection])
        output['N_fwd'].fill(dataset=dataset,
                             multiplicity=ak.num(fwd)[os_selection],
                             weight=weight.weight()[os_selection])

        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[os_selection].pt,
                           phi=ev.MET[os_selection].phi,
                           weight=weight.weight()[os_selection])

        output['electron'].fill(dataset=dataset,
                                pt=ak.to_numpy(ak.flatten(electron[BL].pt)),
                                eta=ak.to_numpy(ak.flatten(electron[BL].eta)),
                                phi=ak.to_numpy(ak.flatten(electron[BL].phi)),
                                weight=weight.weight()[BL])

        output['muon'].fill(dataset=dataset,
                            pt=ak.to_numpy(ak.flatten(muon[BL].pt)),
                            eta=ak.to_numpy(ak.flatten(muon[BL].eta)),
                            phi=ak.to_numpy(ak.flatten(muon[BL].phi)),
                            weight=weight.weight()[BL])

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['trail_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['fwd_jet'].fill(dataset=dataset,
                               pt=ak.flatten(high_p_fwd[BL].pt_nom),
                               eta=ak.flatten(high_p_fwd[BL].eta),
                               phi=ak.flatten(high_p_fwd[BL].phi),
                               weight=weight.weight()[BL])

        output['b1'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 0:1][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 0:1][BL].phi),
                          weight=weight.weight()[BL])

        output['b2'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 1:2][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt_nom[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        output['j2'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(jet[:, 1:2][BL].eta),
                          phi=ak.flatten(jet[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j3'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 2:3][BL].pt_nom),
                          eta=ak.flatten(jet[:, 2:3][BL].eta),
                          phi=ak.flatten(jet[:, 2:3][BL].phi),
                          weight=weight.weight()[BL])

        # Now, take care of systematic unceratinties
        if not dataset == 'MuonEG':
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt > 25)]
                jet = jet[~match(
                    jet, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet = jet[~match(
                    jet, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                central = jet[(abs(jet.eta) < 2.4)]
                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd = getFwdJet(light)
                fwd_noPU = getFwdJet(light, puId=False)

                ## forward jets
                high_p_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.p, axis=1))]  # highest momentum spectator
                high_pt_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(
                    ak.argmax(abs(fwd.eta), axis=1))]  # most forward spectator

                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(
                    central.btagDeepFlavB)][:, :2]

                # get the modified selection -> more difficult
                selection.add('N_jet>2_' + var,
                              (ak.num(jet.pt) >= 3))  # stupid bug here...
                selection.add('N_btag=2_' + var, (ak.num(btag) == 2))
                selection.add('N_central>1_' + var, (ak.num(central) >= 2))
                selection.add('N_fwd>0_' + var, (ak.num(fwd) >= 1))
                selection.add('MET>30_' + var, (getattr(ev.MET, var) > 30))

                ## Don't change the selection for now...
                bl_reqs = os_reqs + [
                    'N_jet>2_' + var, 'MET>30_' + var, 'N_btag=2_' + var,
                    'N_central>1_' + var, 'N_fwd>0_' + var
                ]
                bl_reqs_d = {sel: True for sel in bl_reqs}
                BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(jet)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_fwd_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(fwd)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_central_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(central)[os_selection],
                    weight=weight.weight()[os_selection])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet.eta[:, 0:1][BL]),
                                         phi=ak.flatten(jet.phi[:, 0:1][BL]),
                                         weight=weight.weight()[BL])

                output['b1_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta=ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi=ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight=weight.weight()[BL])

                output['fwd_jet_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_p_fwd[BL].pt),
                    eta=ak.flatten(high_p_fwd[BL].eta),
                    phi=ak.flatten(high_p_fwd[BL].phi),
                    weight=weight.weight()[BL])

                output['MET_' + var].fill(dataset=dataset,
                                          pt=getattr(ev.MET,
                                                     var)[os_selection],
                                          phi=ev.MET[os_selection].phi,
                                          weight=weight.weight()[os_selection])

        return output
Example #27
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            ## Generated leptons
            gen_lep = ev.GenL
            leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
            trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton+trailing_lepton).mass
        dilepton_pt = (leading_lepton+trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)
        
        lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
            n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Tau and other stuff
        tau       = getTaus(ev)
        track     = getIsoTracks(ev)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

        bl          = cross(lepton, high_score_btag)
        bl_dR       = delta_r(bl['0'], bl['1'])
        min_bl_dR   = ak.min(bl_dR, axis=1)

        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        
        # define the weight
        weight = Weights( len(ev) )
        

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        

        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )
        
        BL = sel.dilep_baseline(cutflow=cutflow, SS=True)

        weight_BL = weight.weight()[BL]        

        if True:
            # define the inputs to the NN
            # this is super stupid. there must be a better way.
            NN_inputs = np.stack([
                ak.to_numpy(ak.num(jet[BL])),
                ak.to_numpy(ak.num(tau[BL])),
                ak.to_numpy(ak.num(track[BL])),
                ak.to_numpy(st[BL]),
                ak.to_numpy(ev.MET[BL].pt),
                ak.to_numpy(ak.max(mjf[BL], axis=1)),
                ak.to_numpy(pad_and_flatten(delta_eta[BL])),
                ak.to_numpy(pad_and_flatten(leading_lepton[BL].pt)),
                ak.to_numpy(pad_and_flatten(leading_lepton[BL].eta)),
                ak.to_numpy(pad_and_flatten(trailing_lepton[BL].pt)),
                ak.to_numpy(pad_and_flatten(trailing_lepton[BL].eta)),
                ak.to_numpy(pad_and_flatten(dilepton_mass[BL])),
                ak.to_numpy(pad_and_flatten(dilepton_pt[BL])),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].pt)),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].p)),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].eta)),
                ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].pt)),
                ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].pt)),
                ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].eta)),
                ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].eta)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].pt)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].pt)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].eta)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].eta)),
                ak.to_numpy(min_bl_dR[BL]),
                ak.to_numpy(min_mt_lep_met[BL]),
            ])

            NN_inputs = np.moveaxis(NN_inputs, 0, 1)

            model, scaler = load_onnx_model('v8')

            try:
                NN_inputs_scaled = scaler.transform(NN_inputs)

                NN_pred    = predict_onnx(model, NN_inputs_scaled)

                best_score = np.argmax(NN_pred, axis=1)


            except ValueError:
                #print ("Empty NN_inputs")
                NN_pred = np.array([])
                best_score = np.array([])
                NN_inputs_scaled = NN_inputs

            #k.clear_session()

            output['node'].fill(dataset=dataset, multiplicity=best_score, weight=weight_BL)

            output['node0_score_incl'].fill(dataset=dataset, score=NN_pred[:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL)
            output['node0_score'].fill(dataset=dataset, score=NN_pred[best_score==0][:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==0])
            output['node1_score'].fill(dataset=dataset, score=NN_pred[best_score==1][:,1] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==1])
            output['node2_score'].fill(dataset=dataset, score=NN_pred[best_score==2][:,2] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==2])
            output['node3_score'].fill(dataset=dataset, score=NN_pred[best_score==3][:,3] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==3])
            output['node4_score'].fill(dataset=dataset, score=NN_pred[best_score==4][:,4] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==4])

            SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0)))
            SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0)))
            leading_lepton_BL = leading_lepton[BL]

            output['lead_lep_SR_pp'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)),
                weight = weight_BL[SR_sel_pp]
            )

            output['lead_lep_SR_mm'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)),
                weight = weight_BL[SR_sel_mm]
            )

            del model
            del scaler
            del NN_inputs, NN_inputs_scaled, NN_pred

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL)
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL)
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight_BL)
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight_BL)
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight_BL)
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL)
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL)
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight_BL)
        output['ST'].fill(dataset=dataset, pt=st[BL], weight=weight_BL)
        output['HT'].fill(dataset=dataset, pt=ht[BL], weight=weight_BL)

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL)
            output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL)
            output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL)
            output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL)
            output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL)
            output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL)
            output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL)
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL].pt,
            phi  = ev.MET[BL].phi,
            weight = weight_BL
        )

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['lead_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
                weight = weight_BL
            )

            output['trail_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
                weight = weight_BL
            )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight_BL
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight_BL
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight_BL
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight_BL
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight_BL
        )
        
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(j_fwd[BL].pt),
            eta = ak.flatten(j_fwd[BL].eta),
            phi = ak.flatten(j_fwd[BL].phi),
            weight = weight_BL
        )
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight_BL)
        
        return output
Example #28
0
    def process(self, events):

        # get meta infos
        dataset = events.metadata["dataset"]
        isRealData = not hasattr(events, "genWeight")
        n_events = len(events)
        selection = processor.PackedSelection()
        weights = processor.Weights(n_events)
        output = self.accumulator.identity()

        # weights
        if not isRealData:
            output['sumw'][dataset] += awkward1.sum(events.genWeight)
        
        # trigger
        triggers = {}
        for channel in ["e","mu"]:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._trigger[channel]:
                try:
                    trigger = trigger | events.HLT[t]
                except:
                    warnings.warn("Missing trigger %s" % t, RuntimeWarning)
            triggers[channel] = trigger
            
        # met filter
        met_filters = ["goodVertices",
                       "globalSuperTightHalo2016Filter",
                       "HBHENoiseFilter",
                       "HBHENoiseIsoFilter",
                       "EcalDeadCellTriggerPrimitiveFilter",
                       "BadPFMuonFilter",
                       ]
        met_filters_mask = np.ones(len(events), dtype='bool')
        for t in met_filters:
            met_filters_mask = met_filters_mask & events.Flag[t]
        selection.add("met_filter", awkward1.to_numpy(met_filters_mask))
        
        # load objects
        muons = events.Muon
        electrons = events.Electron
        jets = events.Jet
        fatjets = events.FatJet
        subjets = events.SubJet
        fatjetsLS = events.FatJetLS
        met = events.MET
        
        # muons
        goodmuon = (
            (muons.mediumId)
            & (muons.miniPFRelIso_all <= 0.2)
            & (muons.pt >= 27)
            & (abs(muons.eta) <= 2.4)
            & (abs(muons.dz) < 0.1)
            & (abs(muons.dxy) < 0.05)
            & (muons.sip3d < 4)
        )
        good_muons = muons[goodmuon]
        ngood_muons = awkward1.sum(goodmuon, axis=1)

        # electrons
        goodelectron = (
            (electrons.mvaFall17V2noIso_WP90)
            & (electrons.pt >= 30)
            & (abs(electrons.eta) <= 1.479)
            & (abs(electrons.dz) < 0.1)
            & (abs(electrons.dxy) < 0.05)
            & (electrons.sip3d < 4)
        )
        good_electrons = electrons[goodelectron]
        ngood_electrons = awkward1.sum(goodelectron, axis=1)
        
        # good leptons
        good_leptons = awkward1.concatenate([good_muons, good_electrons], axis=1)
        good_leptons = good_leptons[awkward1.argsort(good_leptons.pt)]
        
        # lepton candidate
        candidatelep = awkward1.firsts(good_leptons)
        
        # lepton channel selection
        selection.add("ch_e", awkward1.to_numpy((triggers["e"]) & (ngood_electrons==1) & (ngood_muons==0))) # not sure if need to require 0 muons or 0 electrons in the next line
        selection.add("ch_mu", awkward1.to_numpy((triggers["mu"]) & (ngood_electrons==0) & (ngood_muons==1)))
        
        # jets
        ht = awkward1.sum(jets[jets.pt > 30].pt,axis=1)
        selection.add("ht_400", awkward1.to_numpy(ht>=400))
        goodjet = (
            (jets.isTight)
            & (jets.pt > 30)
            & (abs(jets.eta) <= 2.5)
            )
        good_jets = jets[goodjet]

        # fat jets
        jID = "isTight"
        # TODO: add mass correction

        # a way to get the first two subjets
        # cart = awkward1.cartesian([fatjets, subjets], nested=True)
        # idxes = awkward1.pad_none(awkward1.argsort(cart['0'].delta_r(cart['1'])), 2, axis=2)
        # sj1 = subjets[idxes[:,:,0]]
        # sj2 = subjets[idxes[:,:,1]]
        
        good_fatjet = (
            (getattr(fatjets, jID))
            & (abs(fatjets.eta) <= 2.4)
            & (fatjets.pt > 50)
            & (fatjets.msoftdrop > 30)
            & (fatjets.msoftdrop < 210)
            #& (fatjets.pt.copy(content=fatjets.subjets.content.counts) == 2) # TODO: require 2 subjets?
            # this can probably be done w FatJet_subJetIdx1 or FatJet_subJetIdx2
            & (awkward1.all(fatjets.subjets.pt >= 20))
            & (awkward1.all(abs(fatjets.subjets.eta) <= 2.4))
        )
        good_fatjets = fatjets[good_fatjet]

        # hbb candidate
        mask_hbb = (
            (good_fatjets.pt > 200)
            & (good_fatjets.delta_r(candidatelep) > 2.0)
            )
        candidateHbb = awkward1.firsts(good_fatjets[mask_hbb])

        # b-tag #& (good_fatjets.particleNetMD_Xbb > 0.9)
        selection.add('hbb_btag',awkward1.to_numpy(candidateHbb.deepTagMD_ZHbbvsQCD >= 0.8)) # score would be larger for tight category (0.97)  
        
        # No AK4 b-tagged jets away from bb jet
        jets_HbbV = jets[good_jets.delta_r(candidateHbb) >= 1.2]
        selection.add('hbb_vetobtagaway',  awkward1.to_numpy(awkward1.max(jets_HbbV.btagDeepB, axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._year]['medium']))
        
        # fat jets Lepton Subtracted
        # wjj candidate
        mask_wjj = (
            (fatjetsLS.pt > 50)
            & (fatjetsLS.delta_r(candidatelep) > 1.2)
            # need to add 2 subjets w pt > 20 & eta<2.4
            # need to add ID?
            )
        candidateWjj = awkward1.firsts(fatjetsLS[mask_wjj][awkward1.argmin(fatjetsLS[mask_wjj].delta_r(candidatelep),axis=1,keepdims=True)])
        # add t2/t1 <= 0.75 (0.45 HP)
        selection.add('hww_mass',  awkward1.to_numpy(candidateWjj.mass >= 10))

        print('met ',met)
        # wjjlnu info
        #HSolverLiInfo  hwwInfoLi;
        # qqSDmass = candidateWjj.msoftdrop
        # hwwLi   = hSolverLi->minimize(candidatelep.p4(), met.p4(), wjjcand.p4(), qqSDmass, hwwInfoLi)
        #neutrino = hwwInfoLi.neutrino;
        #wlnu     = hwwInfoLi.wlnu;
        #wqq      = hwwInfoLi.wqqjet;
        #hWW      = hwwInfoLi.hWW;
        #wwDM     = PhysicsUtilities::deltaR( wlnu,wqq) * hWW.pt()/2.0;
        # add dlvqq <= 11 (2.5 HP)
               
        # in the meantime let's add the mass
        '''
        mm = (candidatejet - candidatelep).mass2
        jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*candidatejet.mass
        joffshell = jmass < 62.5
        massassumption = 80.*joffshell + (125 - 80.)*~joffshell
        x = massassumption**2/(2*candidatelep.pt*met.pt) + np.cos(candidatelep.phi - met.phi)
        met_eta = (
            (x < 1)*np.arcsinh(x*np.sinh(candidatelep.eta))
            + (x > 1)*(
                candidatelep.eta - np.sign(candidatelep.eta)*np.arccosh(candidatelep.eta)
                )
            )
        met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),np.array([0.]),np.array([0.]),np.array([0.]))
        if met.size > 0:
            met_p4 = TLorentzVectorArray.from_ptetaphim(met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size))
        
        # hh system
        candidateHH = candidateWjj + met_p4 + candidateHbb
        selection.add('hh_mass', candidateHH.mass >= 700)
        selection.add('hh_centrality', candidateHH.pt/candidateHH.mass >= 0.3)
        '''
        
        channels = {"e": ["met_filter","ch_e","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"], #,"hh_mass","hh_centrality"],
                    "mu": ["met_filter","ch_mu","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"] #,"hh_mass","hh_centrality"],
                    }

        # need to add gen info
        
        if not isRealData:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            
        for channel, cuts in channels.items():
            allcuts = set()
            output['cutflow'].fill(dataset=dataset, channel=channel, cut=0, weight=weights.weight())
            for i, cut in enumerate(cuts):
                allcuts.add(cut)
                cut = selection.all(*allcuts)
                output['cutflow'].fill(dataset=dataset, channel=channel, cut=i + 1, weight=weights.weight()[cut])

        return output
Example #29
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()
        
        # Initialize dict accumulators, if have not been initialized
        for jet in [0, 1, 2]:
            if dataset_name not in output[f"eta_{jet}_final"].keys():
                output[f"eta_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"ptoverM_{jet}_final"].keys():
                output[f"ptoverM_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            if dataset_name not in output[f"dEta_{pair[0]}{pair[1]}_final"].keys():
                output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dR_{pair[0]}{pair[1]}_final"].keys():
                output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"moverM_{pair[0]}{pair[1]}_final"].keys():
                output[f"moverM_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            if dataset_name not in output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_max_final"].keys():
            output[f"ptoverM_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_min_final"].keys():
            output[f"ptoverM_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"eta_max_final"].keys():
            output[f"eta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_max_final"].keys():
            output[f"dR_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_min_final"].keys():
            output[f"dR_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_max_final"].keys():
            output[f"dEta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_min_final"].keys():
            output[f"dEta_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_max_final"].keys():
            output[f"dR_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_min_final"].keys():
            output[f"dR_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_max_final"].keys():
            output[f"dEta_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_min_final"].keys():
            output[f"dEta_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_max_final"].keys():
            output[f"dPhi_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_min_final"].keys():
            output[f"dPhi_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_max_final"].keys():
            output[f"dPtoverM_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_min_final"].keys():
            output[f"dPtoverM_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name: #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        
        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]
        
        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0]
        
        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta)
        
        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] + selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass
        
        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:,0] / m3j
        m_12_overM = m_ij[:,1] / m3j
        m_20_overM = m_ij[:,2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt - (selected_jets[:, 1] + selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt - (selected_jets[:, 2] + selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt - (selected_jets[:, 0] + selected_jets[:, 1]).pt) / m3j
        
        # Event selection masks
        # selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(**{name: True for name in selection.names})
        # selection_masks["Pre-selection"] = sel_mask
        
        output["selected_events"][dataset_name] += events_3j[sel_mask].__len__()
        
        for jet in [0, 1, 2]:
            output[f"eta_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(selected_jets[:, jet][sel_mask].eta))
            output[f"ptoverM_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(pt_i_overM[:, jet][sel_mask]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_ij[:, pair[0]][sel_mask]))
            output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_ij[:, pair[0]][sel_mask]))
        
        output[f"moverM_01_final"][dataset_name] += processor.column_accumulator(np.array(m_01_overM[sel_mask]))
        output[f"moverM_12_final"][dataset_name] += processor.column_accumulator(np.array(m_12_overM[sel_mask]))
        output[f"moverM_20_final"][dataset_name] += processor.column_accumulator(np.array(m_20_overM[sel_mask]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_i_jk[:, pair[0]][sel_mask]))
            output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_i_jk[:, pair[0]][sel_mask]))
            output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dPhi_i_jk[:, pair[0]][sel_mask]))
        
        output[f"dPtoverM_0_12_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_0_12[sel_mask]))
        output[f"dPtoverM_1_20_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_1_20[sel_mask]))
        output[f"dPtoverM_2_01_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_2_01[sel_mask]))
        
        max_pt_overM_2fill = awk.max(pt_i_overM[sel_mask], axis=1)
        min_pt_overM_2fill = awk.min(pt_i_overM[sel_mask], axis=1)
        max_dR_2fill   = awk.max(dR_ij[sel_mask], axis=1)
        max_dEta_2fill = awk.max(dEta_ij[sel_mask], axis=1)
        min_dR_2fill   = awk.min(dR_ij[sel_mask], axis=1)
        min_dEta_2fill = awk.min(dEta_ij[sel_mask], axis=1)
        min_pt_2fill   = awk.min(selected_jets[sel_mask].pt, axis=1)
        max_eta_2fill  = awk.max(abs(selected_jets[sel_mask].eta), axis=1)
        max_dR_i_jk_2fill = awk.max(dR_i_jk[sel_mask], axis=1)
        min_dR_i_jk_2fill = awk.min(dR_i_jk[sel_mask], axis=1)
        max_dEta_i_jk_2fill = awk.max(dEta_i_jk[sel_mask], axis=1)
        min_dEta_i_jk_2fill = awk.min(dEta_i_jk[sel_mask], axis=1)
        max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk[sel_mask], axis=1)
        min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk[sel_mask], axis=1)
        max_dPtoverM_i_jk_2fill = []
        min_dPtoverM_i_jk_2fill = []
        dPtoverM_0_12_2fill = dPtoverM_0_12[sel_mask]
        dPtoverM_1_20_2fill = dPtoverM_1_20[sel_mask]
        dPtoverM_2_01_2fill = dPtoverM_2_01[sel_mask]
        for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill, dPtoverM_2_01_2fill):
            max_dPtoverM_i_jk_2fill.append(max(pair))
            min_dPtoverM_i_jk_2fill.append(min(pair))
        max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
        min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
        max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
        max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
        min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
        min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
        min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
        max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
        max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
        min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
        max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
        min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
        max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
        min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)
        
        output[f"ptoverM_max_final"][dataset_name] += processor.column_accumulator(np.array(max_pt_overM_2fill))
        output[f"ptoverM_min_final"][dataset_name] += processor.column_accumulator(np.array(min_pt_overM_2fill))
        output[f"eta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_eta_2fill))
        output[f"dR_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_2fill))
        output[f"dR_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_2fill))
        output[f"dEta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_2fill))
        output[f"dEta_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_2fill))
        output[f"dR_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_i_jk_2fill))
        output[f"dR_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_i_jk_2fill))
        output[f"dEta_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_i_jk_2fill))
        output[f"dEta_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_i_jk_2fill))
        output[f"dPhi_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPhi_i_jk_2fill))
        output[f"dPhi_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPhi_i_jk_2fill))
        output[f"dPtoverM_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPtoverM_i_jk_2fill))
        output[f"dPtoverM_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPtoverM_i_jk_2fill))

        return output 
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Generated leptons
        gen_lep = ev.GenL
        leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
        trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)

        dimuon = choose(muon,2)
        OS_dimuon = dimuon[(dimuon['0'].charge*dimuon['1'].charge < 0)]

        dielectron = choose(electron)
        OS_dielectron = dielectron[(dielectron['0'].charge*dielectron['1'].charge < 0)]

        OS_dimuon_bestZmumu = OS_dimuon[ak.singletons(ak.argmin(abs(OS_dimuon.mass-91.2), axis=1))]
        OS_dielectron_bestZee = OS_dielectron[ak.singletons(ak.argmin(abs(OS_dielectron.mass-91.2), axis=1))]
        OS_dilepton_mass = ak.fill_none(ak.pad_none(ak.concatenate([OS_dimuon_bestZmumu.mass, OS_dielectron_bestZee.mass], axis=1), 1, clip=True), -1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )
        
        if not dataset=='MuonEG':
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            ## lepton SFs
            #weight.add("lepton", self.leptonSF.get(electron, muon))
        
        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )

        BL = sel.trilep_baseline(cutflow=cutflow)
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight.weight()[BL])
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight.weight()[BL])
        output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL])
        output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL])
        output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL])
        output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL])
        output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL])
        output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])
        
        # make a plot of the dilepton mass, but without applying the cut on the dilepton mass itself (N-1 plot)
        output['dilep_mass'].fill(dataset=dataset, mass=ak.flatten(OS_dilepton_mass[sel.trilep_baseline(omit=['offZ'])]), weight=weight.weight()[sel.trilep_baseline(omit=['offZ'])])

        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL].pt,
            phi  = ev.MET[BL].phi,
            weight = weight.weight()[BL]
        )

        output['lead_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )

        output['trail_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        #output['j3'].fill(
        #    dataset = dataset,
        #    pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
        #    eta = ak.flatten(jet[:, 2:3][BL].eta),
        #    phi = ak.flatten(jet[:, 2:3][BL].phi),
        #    weight = weight.weight()[BL]
        #)
        
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(j_fwd[BL].pt),
            eta = ak.flatten(j_fwd[BL].eta),
            phi = ak.flatten(j_fwd[BL].phi),
            weight = weight.weight()[BL]
        )
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight.weight()[BL])
        
        return output