def test_IndexedOptionArray():
    content = awkward1.Array([1.1, 2.2, 3.3, 4.4, 5.5]).layout
    index = awkward1.layout.Index64(numpy.array([4, 2, -1, -1, 1, 0, 1]))
    array = awkward1.Array(awkward1.layout.IndexedOptionArray64(
        index, content))
    assert array.tolist() == [5.5, 3.3, None, None, 2.2, 1.1, 2.2]
    assert awkward1.min(array, axis=0) == 1.1
    assert awkward1.argmin(array, axis=0) == 5

    assert awkward1.argmin(awkward1.Array([[2.2, 1.1], [None, 3.3], [2.2,
                                                                     1.1]]),
                           axis=-1).tolist() == [1, 1, 1]
    assert awkward1.argmin(awkward1.Array([[2.2, 1.1], [None, 3.3],
                                           [2.2, None, 1.1]]),
                           axis=-1).tolist() == [1, 1, 2]
    assert awkward1.argmin(awkward1.Array([[2.2, 1.1], [3.3, None],
                                           [2.2, None, 1.1]]),
                           axis=-1).tolist() == [1, 0, 2]

    assert awkward1.argmin(awkward1.Array([[2.2, 1.1, 0.0], [], [None, 0.5],
                                           [2, 1]]),
                           axis=0).tolist() == [3, 2, 0]
    assert awkward1.argmin(awkward1.Array([[2.2, 1.1, 0.0], [], [0.5, None],
                                           [2, 1]]),
                           axis=0).tolist() == [2, 3, 0]
    assert awkward1.argmin(awkward1.Array([[2.2, 1.1, 0.0], [0.5, None], [],
                                           [2, 1]]),
                           axis=0).tolist() == [1, 3, 0]
def test_jagged_axis0():
    assert awkward1.min(awkward1.Array([[1.1, 5.5], [4.4],
                                        [2.2, 3.3, 0.0, -10]]),
                        axis=0).tolist() == [1.1, 3.3, 0, -10]
    assert awkward1.argmin(awkward1.Array([[1.1, 5.5], [4.4],
                                           [2.2, 3.3, 0.0, -10]]),
                           axis=0).tolist() == [0, 2, 2, 2]
Ejemplo n.º 3
0
def embed_crossref(source, idx_name, dest, dest_name):
    """Embed a cross-reference

    Parameters
    ----------
        source : ak.Array
            any array with shape N * var * {record}
        idx_name : str
            A field in the source record
        dest : ak.Array
            any array with shape N * var * {record}, where:
            ``ak.max(source[idx_name], axis=1) < ak.num(dest)`` and
            ``ak.min(source[idx_name], axis=1) >= 0``
    """
    print(ak.max(source[idx_name], axis=1))
    print(ak.num(dest))
    print(ak.all(ak.max(source[idx_name], axis=1) < ak.num(dest)))

    assert ak.all(ak.max(source[idx_name], axis=1) < ak.num(dest))
    assert ak.all(ak.min(source[idx_name], axis=1) >= 0)

    id_global = ak.flatten(
        source[idx_name] + np.asarray(dest.layout.starts), axis=None
    )
    source[dest_name] = ak.Array(
        ak.layout.ListOffsetArray64(
            source.layout.offsets,
            ak.layout.ListOffsetArray64(
                source.layout.content.offsets,
                ak.flatten(dest)[id_global].layout,
            ),
        )
    )
Ejemplo n.º 4
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            ## Generated leptons
            gen_lep = ev.GenL
            leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
            trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton+trailing_lepton).mass
        dilepton_pt = (leading_lepton+trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)
        
        lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
            n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Tau and other stuff
        tau       = getTaus(ev)
        track     = getIsoTracks(ev)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

        bl          = cross(lepton, high_score_btag)
        bl_dR       = delta_r(bl['0'], bl['1'])
        min_bl_dR   = ak.min(bl_dR, axis=1)

        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        
        # define the weight
        weight = Weights( len(ev) )
        

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        

        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )
        
        BL = sel.dilep_baseline(cutflow=cutflow, SS=True)

        weight_BL = weight.weight()[BL]        

        if True:
            # define the inputs to the NN
            # this is super stupid. there must be a better way.
            NN_inputs = np.stack([
                ak.to_numpy(ak.num(jet[BL])),
                ak.to_numpy(ak.num(tau[BL])),
                ak.to_numpy(ak.num(track[BL])),
                ak.to_numpy(st[BL]),
                ak.to_numpy(ev.MET[BL].pt),
                ak.to_numpy(ak.max(mjf[BL], axis=1)),
                ak.to_numpy(pad_and_flatten(delta_eta[BL])),
                ak.to_numpy(pad_and_flatten(leading_lepton[BL].pt)),
                ak.to_numpy(pad_and_flatten(leading_lepton[BL].eta)),
                ak.to_numpy(pad_and_flatten(trailing_lepton[BL].pt)),
                ak.to_numpy(pad_and_flatten(trailing_lepton[BL].eta)),
                ak.to_numpy(pad_and_flatten(dilepton_mass[BL])),
                ak.to_numpy(pad_and_flatten(dilepton_pt[BL])),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].pt)),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].p)),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].eta)),
                ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].pt)),
                ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].pt)),
                ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].eta)),
                ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].eta)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].pt)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].pt)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].eta)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].eta)),
                ak.to_numpy(min_bl_dR[BL]),
                ak.to_numpy(min_mt_lep_met[BL]),
            ])

            NN_inputs = np.moveaxis(NN_inputs, 0, 1)

            model, scaler = load_onnx_model('v8')

            try:
                NN_inputs_scaled = scaler.transform(NN_inputs)

                NN_pred    = predict_onnx(model, NN_inputs_scaled)

                best_score = np.argmax(NN_pred, axis=1)


            except ValueError:
                #print ("Empty NN_inputs")
                NN_pred = np.array([])
                best_score = np.array([])
                NN_inputs_scaled = NN_inputs

            #k.clear_session()

            output['node'].fill(dataset=dataset, multiplicity=best_score, weight=weight_BL)

            output['node0_score_incl'].fill(dataset=dataset, score=NN_pred[:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL)
            output['node0_score'].fill(dataset=dataset, score=NN_pred[best_score==0][:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==0])
            output['node1_score'].fill(dataset=dataset, score=NN_pred[best_score==1][:,1] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==1])
            output['node2_score'].fill(dataset=dataset, score=NN_pred[best_score==2][:,2] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==2])
            output['node3_score'].fill(dataset=dataset, score=NN_pred[best_score==3][:,3] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==3])
            output['node4_score'].fill(dataset=dataset, score=NN_pred[best_score==4][:,4] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==4])

            SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0)))
            SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0)))
            leading_lepton_BL = leading_lepton[BL]

            output['lead_lep_SR_pp'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)),
                weight = weight_BL[SR_sel_pp]
            )

            output['lead_lep_SR_mm'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)),
                weight = weight_BL[SR_sel_mm]
            )

            del model
            del scaler
            del NN_inputs, NN_inputs_scaled, NN_pred

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL)
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL)
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight_BL)
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight_BL)
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight_BL)
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL)
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL)
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight_BL)
        output['ST'].fill(dataset=dataset, pt=st[BL], weight=weight_BL)
        output['HT'].fill(dataset=dataset, pt=ht[BL], weight=weight_BL)

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL)
            output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL)
            output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL)
            output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL)
            output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL)
            output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL)
            output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL)
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL].pt,
            phi  = ev.MET[BL].phi,
            weight = weight_BL
        )

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['lead_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
                weight = weight_BL
            )

            output['trail_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
                weight = weight_BL
            )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight_BL
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight_BL
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight_BL
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight_BL
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight_BL
        )
        
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(j_fwd[BL].pt),
            eta = ak.flatten(j_fwd[BL].eta),
            phi = ak.flatten(j_fwd[BL].phi),
            weight = weight_BL
        )
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight_BL)
        
        return output
def test_jagged_axis1():
    # first is [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [4, 3, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [5, 4, 3]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[], [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [6, 5, 4]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[1.1], [1.1, 2.2], [], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [5, 4, 3]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [5, 4, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [5, 3, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0], []]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [4, 3, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[1.1, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [4, 3, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]],
                            [[1.1, 999, 999, 999], [1.1, 2.2, 999],
                             [1.1, 2.2, 3.3], [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3],
                                                    [1, 2, 3.3, 999]]
    assert awkward1.argmin(array, axis=1).tolist() == [[4, 3, 2], [4, 3, 2, 0]]

    # first is [[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0], [1.0]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [4, 3, 2]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [5, 4, 3]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[], [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [6, 5, 4]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [5, 4, 3]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [5, 4, 2]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [5, 3, 2]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0], []]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [4, 3, 2]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[1.1, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [4, 3, 2]]

    array = awkward1.Array([[[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]],
                            [[1.1, 999, 999, 999], [1.1, 2.2, 999],
                             [1.1, 2.2, 3.3], [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3],
                                                    [1, 2, 3.3, 999]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 3], [4, 3, 2, 0]]

    # first is [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [], [999, 2.0], [1.0]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [4, 3, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [5, 4, 3]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[], [], [1.1], [1.1, 2.2], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [6, 5, 4]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [5, 4, 3]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [5, 4, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [5, 3, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [999, 2.0],
                             [1.0], []]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [4, 3, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[1.1, 999, 999], [1.1, 2.2, 999], [1.1, 2.2, 3.3],
                             [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3], [1, 2, 3.3]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [4, 3, 2]]

    array = awkward1.Array([[[1.1], [1.1, 2.2], [1.1, 2.2, 3.3], [],
                             [999, 2.0], [1.0]],
                            [[1.1, 999, 999, 999], [1.1, 2.2, 999],
                             [1.1, 2.2, 3.3], [999, 2.0], [1.0]]])
    assert awkward1.min(array, axis=1).tolist() == [[1, 2, 3.3],
                                                    [1, 2, 3.3, 999]]
    assert awkward1.argmin(array, axis=1).tolist() == [[5, 4, 2], [4, 3, 2, 0]]
Ejemplo n.º 6
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        max_dR = awk.max(dR_ij, axis=1)
        max_dEta = awk.max(dEta_ij, axis=1)
        min_dR = awk.min(dR_ij, axis=1)
        min_dEta = awk.min(dEta_ij, axis=1)
        min_pT = awk.min(selected_jets.pt, axis=1)
        max_eta = abs(awk.max(selected_jets.eta, axis=1))

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)
        dPt_i_jk = abs(selected_jets[:, jet_i].pt -
                       (selected_jets[:, jet_j] + selected_jets[:, jet_k]).pt)

        max_dPhi_jjj = awk.max(dPhi_i_jk, axis=1)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        max_pt_overM = awk.max(pt_i_overM, axis=1)
        min_pt_overM = awk.min(pt_i_overM, axis=1)
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j

        for pt_cut in range(30, 1150, 5):
            cut_name = f"min_pT_cut{pt_cut}".format(pt_cut)
            selection = PackedSelection()
            selection.add("MinJetPt_cut", min_pT > pt_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_min_pT_cut{pt_cut}".format(
                pt_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for eta_cut in np.arange(0, 2.5, 0.05):
            cut_name = f"max_eta_cut{eta_cut}".format(eta_cut)
            selection = PackedSelection()
            selection.add("MaxJetEta_cut", max_eta < eta_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_max_eta_cut{eta_cut}".format(
                eta_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dEta_max_cut in np.arange(0, 5, 0.1):
            cut_name = f"dEta_max_cut{dEta_max_cut}".format(dEta_max_cut)
            selection = PackedSelection()
            selection.add("MaxJJdEta_cut", max_dEta < dEta_max_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dEta_jj_max_cut{dEta_max_cut}".format(
                dEta_max_cut)][dataset_name] += events_3j[sel_mask].__len__()

        for dR_min_cut in np.arange(0, 5, 0.1):
            cut_name = f"dR_min_cut{dR_min_cut}".format(dR_min_cut)
            selection = PackedSelection()
            selection.add("MinJJdR_cut", min_dR > dR_min_cut)
            sel_mask = selection.require(
                **{name: True
                   for name in selection.names})
            output[f"N_dR_jj_min_cut{dR_min_cut}".format(
                dR_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        #min cut for the variable dPhi_jjj_max
        # for dPhi_jjj_max_min_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        # for dPhi_jjj_min_max_cut in range(0,6,0.1):
        # cut_name = f"dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)
        # selections[cut_name] = PackedSelection()
        # selections[cut_name].add("j_jj_dPhi_max_cut", min_dR > dPhi_jjj_max_min_cut)
        # selection_items[cut_name] = []
        # selection_items[cut_name].append("j_jj_dPhi_max_cut")
        # sel_mask = HLT_mask & selections[cut_name].require(**{name: True for name in selection_items[cut_name]})
        # output[f"N_dPhi_jjj_max_min_cut{dPhi_jjj_max_min_cut}".format(dPhi_jjj_max_min_cut)][dataset_name] += events_3j[sel_mask].__len__()

        return output
Ejemplo n.º 7
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:  #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] +
                                                  selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi -
                        (selected_jets[:, jet_j] +
                         selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass

        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt -
                            (selected_jets[:, 1] +
                             selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt -
                            (selected_jets[:, 2] +
                             selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt -
                            (selected_jets[:, 0] +
                             selected_jets[:, 1]).pt) / m3j

        # Event selection masks
        selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(
            **{name: True
               for name in selection.names})
        selection_masks["Pre-selection"] = sel_mask

        # HLT_trigger (this is already done at the beginning)
        # if year == "2016":
        # JetHLT_mask = []
        # if "2016B2" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # elif "2016H" in dataset_name:
        # JetHLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # else:
        # JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2017":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]
        # if year == "2018":
        # JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        # selection_masks["JetHLT"] = JetHLT_mask[event_mask]

        # Fill histograms
        for selection, selection_mask in selection_masks.items():
            output["mjjj"].fill(dataset=dataset_name,
                                selection=selection,
                                mjjj=m3j[selection_mask])

            output["m_ij"].fill(dataset=dataset_name,
                                selection=selection,
                                m_01=m_ij[:, 0][selection_mask],
                                m_12=m_ij[:, 1][selection_mask],
                                m_20=m_ij[:, 2][selection_mask])

            output["dR_ij"].fill(dataset=dataset_name,
                                 selection=selection,
                                 dR_01=dR_ij[:, 0][selection_mask],
                                 dR_12=dR_ij[:, 1][selection_mask],
                                 dR_20=dR_ij[:, 2][selection_mask])

            output["dEta_ij"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dEta_01=dEta_ij[:, 0][selection_mask],
                                   dEta_12=dEta_ij[:, 1][selection_mask],
                                   dEta_20=dEta_ij[:, 2][selection_mask])

            output["moverM_ij"].fill(dataset=dataset_name,
                                     selection=selection,
                                     moverM_01=m_01_overM[selection_mask],
                                     moverM_12=m_12_overM[selection_mask],
                                     moverM_20=m_20_overM[selection_mask])

            output["pt_i"].fill(dataset=dataset_name,
                                selection=selection,
                                pt_0=selected_jets[:, 0][selection_mask].pt,
                                pt_1=selected_jets[:, 1][selection_mask].pt,
                                pt_2=selected_jets[:, 2][selection_mask].pt)

            output["eta_i"].fill(dataset=dataset_name,
                                 selection=selection,
                                 eta_0=selected_jets[:, 0][selection_mask].eta,
                                 eta_1=selected_jets[:, 1][selection_mask].eta,
                                 eta_2=selected_jets[:, 2][selection_mask].eta)

            output["ptoverM_i"].fill(dataset=dataset_name,
                                     selection=selection,
                                     ptoverM_0=pt_i_overM[:,
                                                          0][selection_mask],
                                     ptoverM_1=pt_i_overM[:,
                                                          1][selection_mask],
                                     ptoverM_2=pt_i_overM[:,
                                                          2][selection_mask])

            output["dR_i_jk"].fill(dataset=dataset_name,
                                   selection=selection,
                                   dR_0_12=dR_i_jk[:, 0][selection_mask],
                                   dR_1_20=dR_i_jk[:, 1][selection_mask],
                                   dR_2_01=dR_i_jk[:, 2][selection_mask])

            output["dEta_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dEta_0_12=dEta_i_jk[:, 0][selection_mask],
                                     dEta_1_20=dEta_i_jk[:, 1][selection_mask],
                                     dEta_2_01=dEta_i_jk[:, 2][selection_mask])

            output["dPhi_i_jk"].fill(dataset=dataset_name,
                                     selection=selection,
                                     dPhi_0_12=dPhi_i_jk[:, 0][selection_mask],
                                     dPhi_1_20=dPhi_i_jk[:, 1][selection_mask],
                                     dPhi_2_01=dPhi_i_jk[:, 2][selection_mask])

            output["dPtoverM_i_jk"].fill(
                dataset=dataset_name,
                selection=selection,
                dPtoverM_0_12=dPtoverM_0_12[selection_mask],
                dPtoverM_1_20=dPtoverM_1_20[selection_mask],
                dPtoverM_2_01=dPtoverM_2_01[selection_mask])
            pt_i_overM_2fill = pt_i_overM[selection_mask]
            dR_ij_2fill = dR_ij[selection_mask]
            dEta_ij_2fill = dEta_ij[selection_mask]
            dR_i_jk_2fill = dR_i_jk[selection_mask]
            dEta_i_jk_2fill = dEta_i_jk[selection_mask]
            dPhi_i_jk_2fill = dPhi_i_jk[selection_mask]
            dPtoverM_0_12_2fill = dPtoverM_0_12[selection_mask]
            dPtoverM_1_20_2fill = dPtoverM_1_20[selection_mask]
            dPtoverM_2_01_2fill = dPtoverM_2_01[selection_mask]
            selected_jets_2fill = selected_jets[selection_mask]

            max_pt_overM_2fill = awk.max(pt_i_overM_2fill, axis=1)
            min_pt_overM_2fill = awk.min(pt_i_overM_2fill, axis=1)
            max_dR_2fill = awk.max(dR_ij_2fill, axis=1)
            max_dEta_2fill = awk.max(dEta_ij_2fill, axis=1)
            min_dR_2fill = awk.min(dR_ij_2fill, axis=1)
            min_dEta_2fill = awk.min(dEta_ij_2fill, axis=1)
            min_pt_2fill = awk.min(selected_jets_2fill.pt, axis=1)
            max_eta_2fill = awk.max(abs(selected_jets_2fill.eta), axis=1)
            max_dR_i_jk_2fill = awk.max(dR_i_jk_2fill, axis=1)
            min_dR_i_jk_2fill = awk.min(dR_i_jk_2fill, axis=1)
            max_dEta_i_jk_2fill = awk.max(dEta_i_jk_2fill, axis=1)
            min_dEta_i_jk_2fill = awk.min(dEta_i_jk_2fill, axis=1)
            max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk_2fill, axis=1)
            min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk_2fill, axis=1)
            max_dPtoverM_i_jk_2fill = []
            min_dPtoverM_i_jk_2fill = []
            for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill,
                            dPtoverM_2_01_2fill):
                max_dPtoverM_i_jk_2fill.append(max(pair))
                min_dPtoverM_i_jk_2fill.append(min(pair))
            max_dPtoverM_i_jk_2fill = np.array(max_dPtoverM_i_jk_2fill)
            min_dPtoverM_i_jk_2fill = np.array(min_dPtoverM_i_jk_2fill)

            max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
            min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
            max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
            max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
            min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
            min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
            min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
            max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
            max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
            min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
            max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
            min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
            max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
            min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)

            output["max_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  max_dR=max_dR_2fill)

            output["max_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    max_dEta=max_dEta_2fill)

            output["min_dR"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_dR=min_dR_2fill)

            output["min_dEta"].fill(dataset=dataset_name,
                                    selection=selection,
                                    min_dEta=min_dEta_2fill)

            output["min_pt"].fill(dataset=dataset_name,
                                  selection=selection,
                                  min_pt=min_pt_2fill)

            output["max_eta"].fill(dataset=dataset_name,
                                   selection=selection,
                                   max_eta=max_eta_2fill)

            output["max_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_ptoverM=max_pt_overM_2fill)

            output["min_ptoverM"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_ptoverM=min_pt_overM_2fill)

            output["max_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       max_dR_j_jj=max_dR_i_jk_2fill)

            output["max_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dEta_j_jj=max_dEta_i_jk_2fill)

            output["max_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         max_dPhi_j_jj=max_dPhi_i_jk_2fill)

            output["max_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                max_dPtoverM_j_jj=max_dPtoverM_i_jk_2fill)

            output["min_dR_j_jj"].fill(dataset=dataset_name,
                                       selection=selection,
                                       min_dR_j_jj=min_dR_i_jk_2fill)

            output["min_dEta_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dEta_j_jj=min_dEta_i_jk_2fill)

            output["min_dPhi_j_jj"].fill(dataset=dataset_name,
                                         selection=selection,
                                         min_dPhi_j_jj=min_dPhi_i_jk_2fill)

            output["min_dPtoverM_j_jj"].fill(
                dataset=dataset_name,
                selection=selection,
                min_dPtoverM_j_jj=min_dPtoverM_i_jk_2fill)

        return output
Ejemplo n.º 8
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()
        
        # Initialize dict accumulators, if have not been initialized
        for jet in [0, 1, 2]:
            if dataset_name not in output[f"eta_{jet}_final"].keys():
                output[f"eta_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"ptoverM_{jet}_final"].keys():
                output[f"ptoverM_{jet}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            if dataset_name not in output[f"dEta_{pair[0]}{pair[1]}_final"].keys():
                output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dR_{pair[0]}{pair[1]}_final"].keys():
                output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"moverM_{pair[0]}{pair[1]}_final"].keys():
                output[f"moverM_{pair[0]}{pair[1]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            if dataset_name not in output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
            if dataset_name not in output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"].keys():
                output[f"dPtoverM_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_max_final"].keys():
            output[f"ptoverM_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"ptoverM_min_final"].keys():
            output[f"ptoverM_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"eta_max_final"].keys():
            output[f"eta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_max_final"].keys():
            output[f"dR_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_min_final"].keys():
            output[f"dR_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_max_final"].keys():
            output[f"dEta_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_min_final"].keys():
            output[f"dEta_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_max_final"].keys():
            output[f"dR_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dR_j_jj_min_final"].keys():
            output[f"dR_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_max_final"].keys():
            output[f"dEta_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dEta_j_jj_min_final"].keys():
            output[f"dEta_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_max_final"].keys():
            output[f"dPhi_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPhi_j_jj_min_final"].keys():
            output[f"dPhi_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_max_final"].keys():
            output[f"dPtoverM_j_jj_max_final"][dataset_name] = processor.column_accumulator(np.array([]))
        if dataset_name not in output[f"dPtoverM_j_jj_min_final"].keys():
            output[f"dPtoverM_j_jj_min_final"][dataset_name] = processor.column_accumulator(np.array([]))
        
        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name: #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else: #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        
        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]
        
        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs) # Returns [0, 1, 2] , [1, 2, 0]
        
        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta - selected_jets[:, jet_j].eta)
        
        jet_k = [2, 0, 1]
        dR_i_jk = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j] + selected_jets[:, jet_k])
        dEta_i_jk = abs(selected_jets[:, jet_i].eta - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).eta)
        dPhi_i_jk = abs(selected_jets[:, jet_i].phi - (selected_jets[:, jet_j] + selected_jets[:, jet_k]).phi)

        m3j = selected_jets.sum().mass
        
        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:,0] / m3j
        m_12_overM = m_ij[:,1] / m3j
        m_20_overM = m_ij[:,2] / m3j
        dPtoverM_0_12 = abs(selected_jets[:, 0].pt - (selected_jets[:, 1] + selected_jets[:, 2]).pt) / m3j
        dPtoverM_1_20 = abs(selected_jets[:, 1].pt - (selected_jets[:, 2] + selected_jets[:, 0]).pt) / m3j
        dPtoverM_2_01 = abs(selected_jets[:, 2].pt - (selected_jets[:, 0] + selected_jets[:, 1]).pt) / m3j
        
        # Event selection masks
        # selection_masks = {}
        # Pre-selection
        selection = PackedSelection()
        selection.add("Dummy", m3j > 000)
        sel_mask = selection.require(**{name: True for name in selection.names})
        # selection_masks["Pre-selection"] = sel_mask
        
        output["selected_events"][dataset_name] += events_3j[sel_mask].__len__()
        
        for jet in [0, 1, 2]:
            output[f"eta_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(selected_jets[:, jet][sel_mask].eta))
            output[f"ptoverM_{jet}_final"][dataset_name] += processor.column_accumulator(np.array(pt_i_overM[:, jet][sel_mask]))
        
        for pair in [(0, 1), (1, 2), (2, 0)]:
            output[f"dEta_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_ij[:, pair[0]][sel_mask]))
            output[f"dR_{pair[0]}{pair[1]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_ij[:, pair[0]][sel_mask]))
        
        output[f"moverM_01_final"][dataset_name] += processor.column_accumulator(np.array(m_01_overM[sel_mask]))
        output[f"moverM_12_final"][dataset_name] += processor.column_accumulator(np.array(m_12_overM[sel_mask]))
        output[f"moverM_20_final"][dataset_name] += processor.column_accumulator(np.array(m_20_overM[sel_mask]))
            
        for pair in [(0, 1, 2), (1, 2, 0), (2, 0, 1)]:
            output[f"dR_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dR_i_jk[:, pair[0]][sel_mask]))
            output[f"dEta_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dEta_i_jk[:, pair[0]][sel_mask]))
            output[f"Phi_{pair[0]}_{pair[1]}{pair[2]}_final"][dataset_name] += processor.column_accumulator(np.array(dPhi_i_jk[:, pair[0]][sel_mask]))
        
        output[f"dPtoverM_0_12_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_0_12[sel_mask]))
        output[f"dPtoverM_1_20_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_1_20[sel_mask]))
        output[f"dPtoverM_2_01_final"][dataset_name] += processor.column_accumulator(np.array(dPtoverM_2_01[sel_mask]))
        
        max_pt_overM_2fill = awk.max(pt_i_overM[sel_mask], axis=1)
        min_pt_overM_2fill = awk.min(pt_i_overM[sel_mask], axis=1)
        max_dR_2fill   = awk.max(dR_ij[sel_mask], axis=1)
        max_dEta_2fill = awk.max(dEta_ij[sel_mask], axis=1)
        min_dR_2fill   = awk.min(dR_ij[sel_mask], axis=1)
        min_dEta_2fill = awk.min(dEta_ij[sel_mask], axis=1)
        min_pt_2fill   = awk.min(selected_jets[sel_mask].pt, axis=1)
        max_eta_2fill  = awk.max(abs(selected_jets[sel_mask].eta), axis=1)
        max_dR_i_jk_2fill = awk.max(dR_i_jk[sel_mask], axis=1)
        min_dR_i_jk_2fill = awk.min(dR_i_jk[sel_mask], axis=1)
        max_dEta_i_jk_2fill = awk.max(dEta_i_jk[sel_mask], axis=1)
        min_dEta_i_jk_2fill = awk.min(dEta_i_jk[sel_mask], axis=1)
        max_dPhi_i_jk_2fill = awk.max(dPhi_i_jk[sel_mask], axis=1)
        min_dPhi_i_jk_2fill = awk.min(dPhi_i_jk[sel_mask], axis=1)
        max_dPtoverM_i_jk_2fill = []
        min_dPtoverM_i_jk_2fill = []
        dPtoverM_0_12_2fill = dPtoverM_0_12[sel_mask]
        dPtoverM_1_20_2fill = dPtoverM_1_20[sel_mask]
        dPtoverM_2_01_2fill = dPtoverM_2_01[sel_mask]
        for pair in zip(dPtoverM_0_12_2fill, dPtoverM_1_20_2fill, dPtoverM_2_01_2fill):
            max_dPtoverM_i_jk_2fill.append(max(pair))
            min_dPtoverM_i_jk_2fill.append(min(pair))
        max_pt_overM_2fill = awk.fill_none(max_pt_overM_2fill, -99)
        min_pt_overM_2fill = awk.fill_none(min_pt_overM_2fill, -99)
        max_dR_2fill = awk.fill_none(max_dR_2fill, -99)
        max_dEta_2fill = awk.fill_none(max_dEta_2fill, -99)
        min_dR_2fill = awk.fill_none(min_dR_2fill, -99)
        min_dEta_2fill = awk.fill_none(min_dEta_2fill, -99)
        min_pt_2fill = awk.fill_none(min_pt_2fill, -99)
        max_eta_2fill = awk.fill_none(max_eta_2fill, -99)
        max_dR_i_jk_2fill = awk.fill_none(max_dR_i_jk_2fill, -99)
        min_dR_i_jk_2fill = awk.fill_none(min_dR_i_jk_2fill, -99)
        max_dEta_i_jk_2fill = awk.fill_none(max_dEta_i_jk_2fill, -99)
        min_dEta_i_jk_2fill = awk.fill_none(min_dEta_i_jk_2fill, -99)
        max_dPhi_i_jk_2fill = awk.fill_none(max_dPhi_i_jk_2fill, -99)
        min_dPhi_i_jk_2fill = awk.fill_none(min_dPhi_i_jk_2fill, -99)
        
        output[f"ptoverM_max_final"][dataset_name] += processor.column_accumulator(np.array(max_pt_overM_2fill))
        output[f"ptoverM_min_final"][dataset_name] += processor.column_accumulator(np.array(min_pt_overM_2fill))
        output[f"eta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_eta_2fill))
        output[f"dR_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_2fill))
        output[f"dR_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_2fill))
        output[f"dEta_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_2fill))
        output[f"dEta_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_2fill))
        output[f"dR_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dR_i_jk_2fill))
        output[f"dR_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dR_i_jk_2fill))
        output[f"dEta_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dEta_i_jk_2fill))
        output[f"dEta_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dEta_i_jk_2fill))
        output[f"dPhi_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPhi_i_jk_2fill))
        output[f"dPhi_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPhi_i_jk_2fill))
        output[f"dPtoverM_j_jj_max_final"][dataset_name] += processor.column_accumulator(np.array(max_dPtoverM_i_jk_2fill))
        output[f"dPtoverM_j_jj_min_final"][dataset_name] += processor.column_accumulator(np.array(min_dPtoverM_i_jk_2fill))

        return output 
Ejemplo n.º 9
0
    def process(self, events):

        output = self._accumulator.identity()
        dataset_name = events.metadata['dataset']
        output["total_events"][dataset_name] += events.__len__()

        # HLT selection
        HLT_mask = []
        if year == "2016":
            if "SingleMuon" in dataset_name:  #this does not work, as the name of file which is under processing is unknown
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu24 | events.HLT.IsoTkMu24 | events.HLT.Mu50 | events.HLT.TkMu50
            else:  #https://twiki.cern.ch/twiki/bin/view/CMS/HLTPathsRunIIList
                if "2016B2" in dataset_name:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                elif "2016H" in dataset_name:
                    HLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
                else:
                    HLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
        if year == "2017":
            if "SingleMuon" in dataset_name:
                if "2017B" in dataset_name:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50
                else:
                    HLT_mask = events.HLT.IsoMu27 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
        if year == "2018":
            if "SingleMuon" in dataset_name:
                HLT_mask = events.HLT.IsoMu24 | events.HLT.Mu50 | events.HLT.OldMu100 | events.HLT.TkMu100
            else:
                HLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500

        # Require 3 jets
        jet_mask = (events.Jet.pt > 30.) & (abs(events.Jet.eta) <
                                            2.5) & (events.Jet.isTight)
        event_mask = (awk.sum(jet_mask, axis=1) >= 3)
        event_mask = event_mask & HLT_mask
        events_3j = events[event_mask]

        # Reduce jet mask to only events with 3 good jets
        jet_mask = jet_mask[event_mask]

        # Array of the jets to consider for trijet resonance
        selected_jets = events_3j.Jet[jet_mask][:, :3]

        # Pairs of jets
        #pairs = awk.argcombinations(selected_jets, 2)
        #jet_i, jet_j = awk.unzip(pairs)
        pairs = [(0, 1), (1, 2), (2, 0)]
        jet_i, jet_j = zip(*pairs)  # Returns [0, 1, 2] , [1, 2, 0]

        m_ij = (selected_jets[:, jet_i] + selected_jets[:, jet_j]).mass
        dR_ij = selected_jets[:, jet_i].delta_r(selected_jets[:, jet_j])
        dEta_ij = abs(selected_jets[:, jet_i].eta -
                      selected_jets[:, jet_j].eta)

        max_dR = awk.max(dR_ij, axis=1)
        max_dEta = awk.max(dEta_ij, axis=1)
        min_dR = awk.min(dR_ij, axis=1)
        min_dEta = awk.min(dEta_ij, axis=1)
        min_pT = awk.min(selected_jets.pt, axis=1)

        #m01 = (selected_jets[:, 0] + selected_jets[:, 1]).mass
        #m12 = (selected_jets[:, 1] + selected_jets[:, 2]).mass
        #m20 = (selected_jets[:, 2] + selected_jets[:, 0]).mass
        #dR01 = (selected_jets[:, 0].delta_r(selected_jets[:, 1]))
        #dR12 = (selected_jets[:, 1].delta_r(selected_jets[:, 2]))
        #dR20 = (selected_jets[:, 2].delta_r(selected_jets[:, 0]))
        #dEta01 = abs(selected_jets[:, 0].eta - selected_jets[:, 1].eta)
        #dEta12 = abs(selected_jets[:, 1].eta - selected_jets[:, 2].eta)
        #dEta20 = abs(selected_jets[:, 2].eta - selected_jets[:, 0].eta)

        m3j = selected_jets.sum(
        ).mass  #(selected_jets[:, 0] + selected_jets[:, 1] + selected_jets[:, 2]).mass

        pt_i_overM = selected_jets.pt / m3j
        m_01_overM = m_ij[:, 0] / m3j
        m_12_overM = m_ij[:, 1] / m3j
        m_20_overM = m_ij[:, 2] / m3j

        # Event selection - pre-selection
        selections = {}
        selection_items = {}
        selections["pre-selection"] = PackedSelection()
        selection_items["pre-selection"] = []
        selections["pre-selection"].add("MaxDEta", max_dEta < 1.3)
        selection_items["pre-selection"].append("MaxDEta")
        selections["pre-selection"].add("MinDR", min_dR > 0.4)
        selection_items["pre-selection"].append("MinDR")
        selections["pre-selection"].add("MinJetPt", min_pT > 50.)
        selection_items["pre-selection"].append("MinJetPt")

        # Event selection - pre-selection & HLT_trigger
        selections["JetHLT - presel"] = PackedSelection()
        selection_items["JetHLT - presel"] = []
        if year == "2016":
            JetHLT_mask = []
            if "2016B2" in dataset_name:
                JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
            elif "2016H" in dataset_name:
                JetHLT_mask = events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
            else:
                JetHLT_mask = events.HLT.PFHT800 | events.HLT.PFHT900 | events.HLT.AK8PFJet450 | events.HLT.AK8PFJet500 | events.HLT.PFJet500 | events.HLT.CaloJet500_NoJetID
            selections["JetHLT - presel"].add("JetHLT_fired",
                                              JetHLT_mask[event_mask])
            selection_items["JetHLT - presel"].append("JetHLT_fired")
        if year == "2017":
            JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
            selections["JetHLT - presel"].add("JetHLT_fired",
                                              JetHLT_mask[event_mask])
            selection_items["JetHLT - presel"].append("JetHLT_fired")
        if year == "2018":
            JetHLT_mask = events.HLT.PFHT1050 | events.HLT.AK8PFJet500 | events.HLT.AK8PFJet550 | events.HLT.CaloJet500_NoJetID | events.HLT.CaloJet550_NoJetID | events.HLT.PFJet500
            selections["JetHLT - presel"].add("JetHLT_fired",
                                              JetHLT_mask[event_mask])
            selection_items["JetHLT - presel"].append("JetHLT_fired")
        selections["JetHLT - presel"].add("MaxDEta", max_dEta < 1.3)
        selection_items["JetHLT - presel"].append("MaxDEta")
        selections["JetHLT - presel"].add("MinDR", min_dR > 0.4)
        selection_items["JetHLT - presel"].append("MinDR")
        selections["JetHLT - presel"].add("MinJetPt", min_pT > 50.)
        selection_items["JetHLT - presel"].append("MinJetPt")

        # Fill histograms
        for selection_name, selection in selections.items():
            output["mjjj"].fill(
                dataset=dataset_name,
                selection=selection_name,
                mjjj=m3j[selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["m01"].fill(
                dataset=dataset_name,
                selection=selection_name,
                m01=m_ij[:, 0][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["m12"].fill(
                dataset=dataset_name,
                selection=selection_name,
                m12=m_ij[:, 1][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["m20"].fill(
                dataset=dataset_name,
                selection=selection_name,
                m20=m_ij[:, 2][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["dR01"].fill(
                dataset=dataset_name,
                selection=selection_name,
                dR01=dR_ij[:, 0][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["dR12"].fill(
                dataset=dataset_name,
                selection=selection_name,
                dR12=dR_ij[:, 1][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["dR20"].fill(
                dataset=dataset_name,
                selection=selection_name,
                dR20=dR_ij[:, 2][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["dEta01"].fill(
                dataset=dataset_name,
                selection=selection_name,
                dEta01=dEta_ij[:, 0][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["dEta12"].fill(
                dataset=dataset_name,
                selection=selection_name,
                dEta12=dEta_ij[:, 1][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["dEta20"].fill(
                dataset=dataset_name,
                selection=selection_name,
                dEta20=dEta_ij[:, 2][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["m01overM"].fill(
                dataset=dataset_name,
                selection=selection_name,
                m01overM=m_01_overM[selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["m12overM"].fill(
                dataset=dataset_name,
                selection=selection_name,
                m12overM=m_12_overM[selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["m20overM"].fill(
                dataset=dataset_name,
                selection=selection_name,
                m20overM=m_20_overM[selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

            output["pt0"].fill(
                dataset=dataset_name,
                selection=selection_name,
                pt0=selected_jets[:, 0][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})].pt)

            output["pt1"].fill(
                dataset=dataset_name,
                selection=selection_name,
                pt1=selected_jets[:, 1][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})].pt)

            output["pt2"].fill(
                dataset=dataset_name,
                selection=selection_name,
                pt2=selected_jets[:, 2][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})].pt)

            output["eta0"].fill(
                dataset=dataset_name,
                selection=selection_name,
                eta0=selected_jets[:, 0][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})].eta)

            output["eta1"].fill(
                dataset=dataset_name,
                selection=selection_name,
                eta1=selected_jets[:, 1][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})].eta)

            output["eta2"].fill(
                dataset=dataset_name,
                selection=selection_name,
                eta2=selected_jets[:, 2][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})].eta)
            output["ptoverM0"].fill(
                dataset=dataset_name,
                selection=selection_name,
                ptoverM0=pt_i_overM[:, 0][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])
            output["ptoverM1"].fill(
                dataset=dataset_name,
                selection=selection_name,
                ptoverM1=pt_i_overM[:, 1][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])
            output["ptoverM2"].fill(
                dataset=dataset_name,
                selection=selection_name,
                ptoverM2=pt_i_overM[:, 2][selection.require(
                    **{name: True
                       for name in selection_items[selection_name]})])

        return output
Ejemplo n.º 10
0
def runOneFile(filename):
    #print ("filename: ", filename)
    #inputfile=filename
    outputfile = "output/" + inputfile.split("/")[-1]
    #outputfile = "tmp.root"
    mycache = uproot4.LRUArrayCache("1 MB")
    file_ = uproot4.open(inputfile, num_workers=10)
    #print ("root file opened: ", filename)
    nevents = ak.to_list(file_["h_total_mcweight"].values())[2]
    #nevents = 1000000
    print("histogram opened: ", nevents)

    #tree_ = uproot4.open(inputfile, num_workers=10)["outTree"].arrays(array_cache=mycache)
    tree_ = file_["outTree"].arrays(array_cache=mycache)
    print("tree length", len(tree_))

    #tree_ = uproot4.open(inputfile)[trees[0]].arrays()
    #tree_ = uproot4.open(inputfile)["outTree"].arrays(array_cache=mycache)
    #tree_ = uproot4.open("Merged_WJetsInclusiveSkim.root")["outTree"].arrays(array_cache=mycache)
    #tree_ = uproot4.open("/eos/cms/store/group/phys_exotica/bbMET/2016_SkimmedFiles/skim_setup_2016_v16_07-00/crab_DYJetsToLL_M-50_HT-400to600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_200918_215129_0000_0.root")["outTree"].arrays(array_cache=mycache)
    #tree_ = uproot4.open("/eos/cms/store/group/phys_exotica/bbMET/2016_SkimmedFiles/skim_setup_2016_v16_07-00/crab_ttHTobb_M125_13TeV_powheg_pythia8_200918_215950_0000_0.root")["outTree"].arrays(array_cache=mycache)
    #print ((tree_))

    cms_events = ak.zip(
        {
            "run":
            tree_["st_runId"],
            "lumi":
            tree_["st_lumiSection"],
            "event":
            tree_["st_eventId"],
            "jetpx":
            tree_["st_THINjetPx"],
            "jetpy":
            tree_["st_THINjetPy"],
            "jetpz":
            tree_["st_THINjetPz"],
            "jete":
            tree_["st_THINjetEnergy"],
            "jetpt":
            getpt(tree_["st_THINjetPx"], tree_["st_THINjetPy"]),
            "jeteta":
            geteta(tree_["st_THINjetPx"], tree_["st_THINjetPy"],
                   tree_["st_THINjetPz"]),
            "jetphi":
            getphi(tree_["st_THINjetPx"], tree_["st_THINjetPy"]),
            "jetcsv":
            tree_["st_THINjetDeepCSV"],
            "jetflav":
            tree_["st_THINjetHadronFlavor"],
            "metpt":
            tree_["st_pfMetCorrPt"],
            "metphi":
            tree_["st_pfMetCorrPhi"],
            "mettrig":
            tree_["st_mettrigdecision"],
            "elepx":
            tree_["st_elePx"],
            "elepy":
            tree_["st_elePy"],
            "elepz":
            tree_["st_elePz"],
            "elee":
            tree_["st_eleEnergy"],
            "eleidL":
            tree_["st_eleIsPassLoose"],
            "eleidT":
            tree_["st_eleIsPassTight"],
            "eleq":
            tree_["st_eleCharge"],
            "elept":
            getpt(tree_["st_elePx"], tree_["st_elePy"]),
            "eleeta":
            geteta(tree_["st_elePx"], tree_["st_elePy"], tree_["st_elePz"]),
            "elephi":
            getphi(tree_["st_elePx"], tree_["st_elePy"]),
            "mupx":
            tree_["st_muPx"],
            "mupy":
            tree_["st_muPy"],
            "mupz":
            tree_["st_muPz"],
            "mue":
            tree_["st_muEnergy"],
            "muidT":
            tree_["st_isTightMuon"],
            "muq":
            tree_["st_muCharge"],
            "mupt":
            getpt(tree_["st_muPx"], tree_["st_muPy"]),
            "mueta":
            geteta(tree_["st_muPx"], tree_["st_muPy"], tree_["st_muPz"]),
            "muphi":
            getphi(tree_["st_muPx"], tree_["st_muPy"]),
            "ntau":
            tree_["st_nTau_discBased_TightEleTightMuVeto"],
            "npho":
            tree_["st_nPho"],
            "phopx":
            tree_["st_phoPx"],
            "phopy":
            tree_["st_phoPy"],
            "phopz":
            tree_["st_phoPz"],
            "phoe":
            tree_["st_phoEnergy"],
            "phopt":
            getpt(tree_["st_phoPx"], tree_["st_phoPy"]),
            "phoeta":
            geteta(tree_["st_phoPx"], tree_["st_phoPy"], tree_["st_phoPz"]),
            "nTrueInt":
            tree_["st_pu_nTrueInt"],
            "nPUVert":
            tree_["st_pu_nPUVert"],
            "genpt":
            tree_["st_genParPt"]
        },
        depth_limit=1)

    out_events = ak.zip(
        {
            "run": tree_["st_runId"],
            "lumi": tree_["st_lumiSection"],
            "event": tree_["st_eventId"]
        },
        depth_limit=1)

    print("event loading done")
    print("# of events: ", len(cms_events))

    ## add more columns/properties to the event
    cms_events["mu_sel_tight"] = (cms_events.mupt > 30) & (
        cms_events.muidT == True) & (numpy.abs(cms_events.mueta) < 2.4)
    cms_events["mu_sel_tight0"] = ak.Array(getN(cms_events.mu_sel_tight, 0))
    cms_events["nMuTight"] = ak.sum(cms_events.mu_sel_tight, axis=-1)
    cms_events["nMuLoose"] = ak.sum((cms_events.mupt > 10), axis=-1)
    cms_events["mu_q0"] = ak.Array(getN(cms_events.muq, 0))
    cms_events["mu_q1"] = ak.Array(getN(cms_events.muq, 1))

    cms_events["ele_sel_tight"] = (cms_events.eleidT == True) & (
        cms_events.elept > 30) & (numpy.abs(cms_events.eleeta) < 2.5)
    cms_events["ele_sel_tight0"] = ak.Array(getN(cms_events.ele_sel_tight, 0))
    cms_events["nEleTight"] = ak.sum(cms_events.ele_sel_tight, axis=-1)
    cms_events["nEleLoose"] = ak.sum((cms_events.elept > 10), axis=-1)
    cms_events["ele_q0"] = ak.Array(getN(cms_events.eleq, 0))
    cms_events["ele_q1"] = ak.Array(getN(cms_events.eleq, 1))

    cms_events["recoil_Wmunu"] = getrecoil(cms_events.nMuTight,
                                           cms_events.mupt, cms_events.muphi,
                                           cms_events.mupx, cms_events.mupy,
                                           cms_events.metpt, cms_events.metphi)
    cms_events["recoil_Wmunu0"] = ak.firsts(cms_events.recoil_Wmunu)
    cms_events["recoil_Wenu"] = getrecoil(cms_events.nEleTight,
                                          cms_events.elept, cms_events.elephi,
                                          cms_events.elepx, cms_events.elepy,
                                          cms_events.metpt, cms_events.metphi)
    cms_events["recoil_Wenu0"] = ak.firsts(cms_events.recoil_Wenu)

    elepx0 = ak.Array(getN(cms_events.elepx, 0))
    elepx1 = ak.Array(getN(cms_events.elepx, 1))

    elepy0 = ak.Array(getN(cms_events.elepy, 0))
    elepy1 = ak.Array(getN(cms_events.elepy, 1))

    elepz0 = ak.Array(getN(cms_events.elepz, 0))
    elepz1 = ak.Array(getN(cms_events.elepz, 1))

    elee0 = ak.Array(getN(cms_events.elee, 0))
    elee1 = ak.Array(getN(cms_events.elee, 1))

    cms_events["Zee_mass"] = numpy.sqrt((elee0 + elee1)**2 -
                                        (elepx0 + elepx1)**2 -
                                        (elepy0 + elepy1)**2 -
                                        (elepz0 + elepz1)**2)
    cms_events["Zee_pt"] = numpy.sqrt((elepx0 + elepx1)**2 +
                                      (elepy0 + elepy1)**2)
    cms_events["Zee_recoil"] = getrecoil1((elepx0 + elepx1), (elepy0 + elepy1),
                                          cms_events.metpt, cms_events.metphi)

    mupx0 = ak.Array(getN(cms_events.mupx, 0))
    mupx1 = ak.Array(getN(cms_events.mupx, 1))

    mupy0 = ak.Array(getN(cms_events.mupy, 0))
    mupy1 = ak.Array(getN(cms_events.mupy, 1))

    mupz0 = ak.Array(getN(cms_events.mupz, 0))
    mupz1 = ak.Array(getN(cms_events.mupz, 1))

    mue0 = ak.Array(getN(cms_events.mue, 0))
    mue1 = ak.Array(getN(cms_events.mue, 1))

    cms_events["Zmumu_mass"] = numpy.sqrt((mue0 + mue1)**2 -
                                          (mupx0 + mupx1)**2 -
                                          (mupy0 + mupy1)**2 -
                                          (mupz0 + mupz1)**2)
    cms_events["Zmumu_pt"] = numpy.sqrt((mupx0 + mupx1)**2 +
                                        (mupy0 + mupy1)**2)
    cms_events["Zmumu_recoil"] = getrecoil1(
        (mupx0 + mupx1), (mupy0 + mupy1), cms_events.metpt, cms_events.metphi)

    #cms_events["recoil_Zmumu"] = getrecoil
    cms_events["recoil_WmunuPhi"] = getRecoilPhi(
        cms_events.nMuTight, cms_events.mupt, cms_events.muphi,
        cms_events.mupx, cms_events.mupy, cms_events.metpt, cms_events.metphi)
    cms_events["recoil_WmunuPhi0"] = ak.firsts(cms_events.recoil_WmunuPhi)

    cms_events["recoil_WenuPhi"] = getRecoilPhi(
        cms_events.nEleTight, cms_events.elept, cms_events.elephi,
        cms_events.elepx, cms_events.elepy, cms_events.metpt,
        cms_events.metphi)
    cms_events["recoil_WenuPhi0"] = ak.firsts(cms_events.recoil_WenuPhi)

    cms_events["mt_Wmunu"] = getMT(cms_events.nMuTight, cms_events.mupt,
                                   cms_events.muphi, cms_events.mupx,
                                   cms_events.mupy, cms_events.metpt,
                                   cms_events.metphi)
    cms_events["mt_Wmunu0"] = ak.firsts(cms_events.mt_Wmunu)

    cms_events["mt_Wenu"] = getMT(cms_events.nEleTight, cms_events.elept,
                                  cms_events.elephi, cms_events.elepx,
                                  cms_events.elepy, cms_events.metpt,
                                  cms_events.metphi)
    cms_events["mt_Wenu0"] = ak.firsts(cms_events.mt_Wenu)

    cms_events["jet_sel_loose"] = (cms_events.jetpt > 30.0) & (numpy.abs(
        cms_events.jeteta) < 2.5)
    cms_events["jet_sel_tight"] = (cms_events.jetpt > 50.0) & (numpy.abs(
        cms_events.jeteta) < 2.5)
    #cms_events["jet_sel_b"]     = (cms_events.jetcsv > 0.6321) & (numpy.abs(cms_events.jeteta)<2.4)
    cms_events["jet_sel_b"] = (
        cms_events.jetcsv[cms_events.jet_sel_loose == True] > 0.6321
    ) & (numpy.abs(cms_events.jeteta[cms_events.jet_sel_loose == True]) < 2.4)

    cms_events["jetptTight"] = cms_events.jetpt[cms_events.jet_sel_tight ==
                                                True]
    cms_events["jetetaTight"] = cms_events.jeteta[cms_events.jet_sel_tight ==
                                                  True]
    cms_events["jetphiTight"] = cms_events.jetphi[cms_events.jet_sel_tight ==
                                                  True]

    cms_events["jetptLoose"] = cms_events.jetpt[cms_events.jet_sel_loose ==
                                                True]
    cms_events["jetetaLoose"] = cms_events.jeteta[cms_events.jet_sel_loose ==
                                                  True]
    cms_events["jetphiLoose"] = cms_events.jetphi[cms_events.jet_sel_loose ==
                                                  True]

    cms_events["jet_sel_tight0"] = ak.Array(
        getN(cms_events.jet_sel_tight[cms_events.jet_sel_loose == True], 0))
    cms_events["jet_sel_b_0"] = ak.Array(getN(cms_events.jet_sel_b, 0))
    cms_events["jet_sel_b_1"] = ak.Array(getN(cms_events.jet_sel_b, 1))

    cms_events["nJetLoose"] = ak.sum(cms_events.jet_sel_loose, axis=-1)
    cms_events["nJetTight"] = ak.sum(cms_events.jet_sel_tight, axis=-1)
    cms_events["nJetb"] = ak.sum(cms_events.jet_sel_b, axis=-1)

    cms_events["dphi_jet_met"] = DeltaPhi(
        cms_events.jetphi[cms_events.jet_sel_loose == True], cms_events.metphi)
    cms_events["min_dphi_jet_met"] = ak.min(cms_events.dphi_jet_met, axis=-1)

    #--------------------------------------------------------------------------------------------------
    ## W --> lepton + nu
    #--------------------------------------------------------------------------------------------------
    from regions import get_mask_wmunu1b, get_mask_wmunu2b, get_mask_wenu1b, get_mask_wenu2b, get_mask_topmunu1b, get_mask_topmunu2b, get_mask_topenu1b, get_mask_topenu2b, get_mask_Zmumu1b, get_mask_Zmumu2b, get_mask_Zee1b, get_mask_Zee2b, get_mask_SR1b, get_mask_SR2b

    cms_events["mask_wmunu1b"] = get_mask_wmunu1b(cms_events)
    cms_events["mask_wmunu2b"] = get_mask_wmunu2b(cms_events)
    cms_events["mask_wenu1b"] = get_mask_wenu1b(cms_events)
    cms_events["mask_wenu2b"] = get_mask_wenu2b(cms_events)
    cms_events["mask_topmunu1b"] = get_mask_topmunu1b(cms_events)
    cms_events["mask_topmunu2b"] = get_mask_topmunu2b(cms_events)
    cms_events["mask_topenu1b"] = get_mask_topenu1b(cms_events)
    cms_events["mask_topenu2b"] = get_mask_topenu2b(cms_events)
    cms_events["mask_Zmumu1b"] = get_mask_Zmumu1b(cms_events)
    cms_events["mask_Zmumu2b"] = get_mask_Zmumu2b(cms_events)
    cms_events["mask_Zee1b"] = get_mask_Zee1b(cms_events)
    cms_events["mask_Zee2b"] = get_mask_Zee2b(cms_events)
    cms_events["mask_SR1b"] = get_mask_SR1b(cms_events)
    cms_events["mask_SR2b"] = get_mask_SR2b(cms_events)
    '''
    wm = cms_events.event[mask_SR2b]
    wm[~ak.is_none(wm)]
    '''

    ###############
    out_events["metpt"] = cms_events["metpt"]
    out_events["metphi"] = cms_events["metphi"]
    out_events["nTrueInt"] = cms_events["nTrueInt"]
    out_events["nJetLoose"] = cms_events["nJetLoose"]

    out_events["mu_sel_tight0"] = cms_events["mu_sel_tight0"]
    out_events["nMuTight"] = cms_events["nMuTight"]
    out_events["nMuLoose"] = cms_events["nMuLoose"]
    out_events["mu_q0"] = cms_events["mu_q0"]
    out_events["mu_q1"] = cms_events["mu_q1"]
    out_events["mupt0"] = ak.Array(getN(cms_events.mupt, 0))
    out_events["mupt1"] = ak.Array(getN(cms_events.mupt, 1))
    out_events["mueta0"] = ak.Array(getN(cms_events.mueta, 0))
    out_events["mueta1"] = ak.Array(getN(cms_events.mueta, 1))
    out_events["muphi0"] = ak.Array(getN(cms_events.muphi, 0))
    out_events["muphi1"] = ak.Array(getN(cms_events.muphi, 1))

    out_events["ele_sel_tight0"] = cms_events["ele_sel_tight0"]
    out_events["nEleTight"] = cms_events["nEleTight"]
    out_events["nEleLoose"] = cms_events["nEleLoose"]
    out_events["ele_q0"] = cms_events["ele_q0"]
    out_events["ele_q1"] = cms_events["ele_q1"]
    out_events["elept0"] = ak.Array(getN(cms_events.elept, 0))
    out_events["elept1"] = ak.Array(getN(cms_events.elept, 1))
    out_events["eleeta0"] = ak.Array(getN(cms_events.eleeta, 0))
    out_events["eleeta1"] = ak.Array(getN(cms_events.eleeta, 1))
    out_events["elephi0"] = ak.Array(getN(cms_events.elephi, 0))
    out_events["elephi1"] = ak.Array(getN(cms_events.elephi, 1))

    out_events["recoil_Wmunu0"] = cms_events["recoil_Wmunu0"]
    out_events["recoil_Wenu0"] = cms_events["recoil_Wenu0"]
    out_events["recoil_WmunuPhi0"] = cms_events["recoil_WmunuPhi0"]
    out_events["recoil_WenuPhi0"] = cms_events["recoil_WenuPhi0"]
    out_events["mt_Wmunu0"] = cms_events["mt_Wmunu0"]
    out_events["mt_Wenu0"] = cms_events["mt_Wenu0"]

    out_events["Zee_mass"] = cms_events["Zee_mass"]
    out_events["Zee_pt"] = cms_events["Zee_pt"]
    out_events["Zee_recoil"] = cms_events["Zee_recoil"]
    out_events["Zmumu_mass"] = cms_events["Zmumu_mass"]
    out_events["Zmumu_pt"] = cms_events["Zmumu_pt"]
    out_events["Zmumu_recoil"] = cms_events["Zmumu_recoil"]

    out_events["nJetLoose"] = cms_events["nJetLoose"]
    out_events["nJetTight"] = cms_events["nJetTight"]
    out_events["nJetb"] = cms_events["nJetb"]
    out_events["min_dphi_jet_met"] = cms_events["min_dphi_jet_met"]
    cms_events["jet_sel_tight0"] = cms_events["jet_sel_tight0"]
    cms_events["jet_sel_b_0"] = cms_events["jet_sel_b_0"]
    cms_events["jet_sel_b_1"] = cms_events["jet_sel_b_1"]

    out_events["jetpt0"] = ak.Array(getN(cms_events.jetptTight, 0))
    out_events["jetpt1"] = ak.Array(getN(cms_events.jetptLoose, 1))
    out_events["jetpt2"] = ak.Array(getN(cms_events.jetptLoose, 2))
    out_events["jetpt3"] = ak.Array(getN(cms_events.jetptLoose, 3))
    out_events["jetpt4"] = ak.Array(getN(cms_events.jetptLoose, 4))
    out_events["jetpt5"] = ak.Array(getN(cms_events.jetptLoose, 5))
    out_events["jetpt6"] = ak.Array(getN(cms_events.jetptLoose, 6))

    out_events["jeteta0"] = ak.Array(getN(cms_events.jetetaTight, 0))
    out_events["jeteta1"] = ak.Array(getN(cms_events.jetetaLoose, 1))
    out_events["jeteta2"] = ak.Array(getN(cms_events.jetetaLoose, 2))
    out_events["jeteta3"] = ak.Array(getN(cms_events.jetetaLoose, 3))
    out_events["jeteta4"] = ak.Array(getN(cms_events.jetetaLoose, 4))
    out_events["jeteta5"] = ak.Array(getN(cms_events.jetetaLoose, 5))
    out_events["jeteta6"] = ak.Array(getN(cms_events.jetetaLoose, 6))

    out_events["jetphi0"] = ak.Array(getN(cms_events.jetphiTight, 0))
    out_events["jetphi1"] = ak.Array(getN(cms_events.jetphiLoose, 1))
    out_events["jetphi2"] = ak.Array(getN(cms_events.jetphiLoose, 2))

    out_events["jetflav0"] = ak.Array(
        getN(cms_events.jetflav[cms_events.jet_sel_tight == True], 0))
    out_events["jetflav1"] = ak.Array(
        getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 1))
    out_events["jetflav2"] = ak.Array(
        getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 2))
    out_events["jetflav3"] = ak.Array(
        getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 3))
    out_events["jetflav4"] = ak.Array(
        getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 4))
    out_events["jetflav5"] = ak.Array(
        getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 5))
    out_events["jetflav6"] = ak.Array(
        getN(cms_events.jetflav[cms_events.jet_sel_loose == True], 6))

    out_events["csv0"] = ak.Array(
        getN(cms_events.jetcsv[cms_events.jet_sel_tight == True], 0))
    out_events["csv1"] = ak.Array(
        getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 1))
    out_events["csv2"] = ak.Array(
        getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 2))
    out_events["csv3"] = ak.Array(
        getN(cms_events.jetcsv[cms_events.jet_sel_loose == True], 3))

    out_events["SR_2b"] = cms_events["mask_SR2b"]
    out_events["SR_1b"] = cms_events["mask_SR1b"]
    out_events["ZeeCR_2b"] = cms_events["mask_Zee2b"]
    out_events["ZeeCR_1b"] = cms_events["mask_Zee1b"]
    out_events["ZmumuCR_2b"] = cms_events["mask_Zmumu2b"]
    out_events["ZmumuCR_1b"] = cms_events["mask_Zmumu1b"]
    out_events["TopenuCR_2b"] = cms_events["mask_topenu2b"]
    out_events["TopenuCR_1b"] = cms_events["mask_topenu1b"]
    out_events["TopmunuCR_2b"] = cms_events["mask_topmunu2b"]
    out_events["TopmunuCR_1b"] = cms_events["mask_topmunu1b"]
    out_events["WenuCR_1b"] = cms_events["mask_wenu1b"]
    out_events["WenuCR_2b"] = cms_events["mask_wenu2b"]
    out_events["WmunuCR_1b"] = cms_events["mask_wmunu1b"]
    out_events["WmunuCR_2b"] = cms_events["mask_wmunu2b"]

    ## btagging SFs
    from read_sfs import btag_sf
    from read_sfs import evaluator

    out_events["btagsf0"] = btag_sf.eval("central", out_events.jetflav0,
                                         abs(out_events.jeteta0),
                                         out_events.jetpt0)
    out_events["btagsf1"] = btag_sf.eval("central", out_events.jetflav1,
                                         abs(out_events.jeteta1),
                                         out_events.jetpt1)
    out_events["btagsf2"] = btag_sf.eval("central", out_events.jetflav2,
                                         abs(out_events.jeteta2),
                                         out_events.jetpt2)
    out_events["btagsf3"] = btag_sf.eval("central", out_events.jetflav3,
                                         abs(out_events.jeteta3),
                                         out_events.jetpt3)
    out_events["btagsf4"] = btag_sf.eval("central", out_events.jetflav4,
                                         abs(out_events.jeteta4),
                                         out_events.jetpt4)
    out_events["btagsf5"] = btag_sf.eval("central", out_events.jetflav5,
                                         abs(out_events.jeteta5),
                                         out_events.jetpt5)
    out_events["btagsf6"] = btag_sf.eval("central", out_events.jetflav6,
                                         abs(out_events.jeteta6),
                                         out_events.jetpt6)

    ## btag efficiency
    out_events["btag_eff_lwp_0"] = evaluator["btag_eff_lwp"](
        out_events.jeteta0, out_events.jetpt0)
    out_events["btag_eff_lwp_1"] = evaluator["btag_eff_lwp"](
        out_events.jeteta1, out_events.jetpt1)

    out_events["ctag_eff_lwp_0"] = evaluator["ctag_eff_lwp"](
        out_events.jeteta0, out_events.jetpt0)
    out_events["ctag_eff_lwp_1"] = evaluator["ctag_eff_lwp"](
        out_events.jeteta1, out_events.jetpt1)

    out_events["ltag_eff_lwp_0"] = evaluator["ltag_eff_lwp"](
        out_events.jeteta0, out_events.jetpt0)
    out_events["ltag_eff_lwp_1"] = evaluator["ltag_eff_lwp"](
        out_events.jeteta1, out_events.jetpt1)

    out_events["btag_eff_mwp_0"] = evaluator["btag_eff_mwp"](
        out_events.jeteta0, out_events.jetpt0)
    out_events["btag_eff_mwp_1"] = evaluator["btag_eff_mwp"](
        out_events.jeteta1, out_events.jetpt1)

    out_events["ctag_eff_mwp_0"] = evaluator["ctag_eff_mwp"](
        out_events.jeteta0, out_events.jetpt0)
    out_events["ctag_eff_mwp_1"] = evaluator["ctag_eff_mwp"](
        out_events.jeteta1, out_events.jetpt1)

    out_events["ltag_eff_mwp_0"] = evaluator["ltag_eff_mwp"](
        out_events.jeteta0, out_events.jetpt0)
    out_events["ltag_eff_mwp_1"] = evaluator["ltag_eff_mwp"](
        out_events.jeteta1, out_events.jetpt1)

    ## ele sfs
    out_events["eleTightSF0"] = evaluator["EGamma_SF2D_T"](out_events.eleeta0,
                                                           out_events.elept0)
    out_events["eleLooseSF1"] = evaluator["EGamma_SF2D_L"](out_events.eleeta1,
                                                           out_events.elept1)
    out_events["eleTrigSF0"] = evaluator["EGamma_SF2D_Trig"](
        out_events.eleeta0, out_events.elept0)
    out_events["eleRecoSF0"] = evaluator["EGamma_SF2D_Reco"](
        out_events.eleeta0, out_events.elept0)

    eleRecoSF1_hi = evaluator["EGamma_SF2D_Reco"](out_events.eleeta1,
                                                  out_events.elept1)
    eleRecoSF1_lo = evaluator["EGamma_SF2D_Reco_lowpt"](out_events.eleeta1,
                                                        out_events.elept1)

    eleRecoSF1_hi_ = ak.fill_none(
        ak.mask(eleRecoSF1_hi, out_events.elept1 > 20.), 0)
    eleRecoSF1_lo_ = ak.fill_none(
        ak.mask(eleRecoSF1_lo, out_events.elept1 > 20.), 0)
    out_events["eleRecoSF1"] = eleRecoSF1_hi_ + eleRecoSF1_lo_

    ## muon sfs
    bcdef_lumi = 19.554725529
    gh_lumi = 16.224846377
    total_lumi = bcdef_lumi + gh_lumi

    ##--------low pt Loose
    muonLooseIDSF_lowpt1 = (
        (bcdef_lumi * evaluator["muon_lowpt_BCDEF_LooseID"]
         (out_events.mupt1, abs(out_events.mueta1))) +
        (gh_lumi * evaluator["muon_lowpt_GH_LooseID"]
         (out_events.mupt1, abs(out_events.mueta1)))) / total_lumi

    ##----------- medium pt Loose
    muonLooseIDSF1 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_LooseID"]
                       (out_events.mueta1, out_events.mupt1)) +
                      (gh_lumi * evaluator["muon_highpt_GH_LooseID"]
                       (out_events.mueta1, out_events.mupt1))) / total_lumi

    muonLooseISOSF1 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_LooseISO"]
                        (out_events.mueta1, out_events.mupt1)) +
                       (gh_lumi * evaluator["muon_highpt_GH_LooseISO"]
                        (out_events.mueta1, out_events.mupt1))) / total_lumi

    muon_loose_ID_low_SF_1 = ak.fill_none(
        ak.mask(muonLooseIDSF_lowpt1, out_events.mupt1 < 20.), 0)
    muon_loose_ID_high_SF_1 = ak.fill_none(
        ak.mask(muonLooseIDSF1, out_events.mupt1 > 20.), 0)

    muon_loose_ID_SF_1 = muon_loose_ID_low_SF_1 + muon_loose_ID_high_SF_1

    out_events["muLooseSF1"] = muon_loose_ID_SF_1 * muonLooseISOSF1

    ##------------medium pt tight
    muonTightIDSF0 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_TightID"]
                       (out_events.mueta0, out_events.mupt0)) +
                      (gh_lumi * evaluator["muon_highpt_GH_TightID"]
                       (out_events.mueta0, out_events.mupt0))) / total_lumi

    muonTightISOSF0 = ((bcdef_lumi * evaluator["muon_highpt_BCDEF_TightISO"]
                        (out_events.mueta0, out_events.mupt0)) +
                       (gh_lumi * evaluator["muon_highpt_GH_TightISO"]
                        (out_events.mueta0, out_events.mupt0))) / total_lumi

    out_events["muTightSF0"] = muonTightIDSF0 * muonTightISOSF0

    out_events["puweight"] = evaluator["pu_weight"](cms_events.nTrueInt)

    ## trigger sfs
    out_events["mettrigWeight"] = evaluator["met_trig"](cms_events.metpt)
    out_events["recoilWmunutrigWeight"] = evaluator["met_trig"](
        cms_events.recoil_Wmunu0)
    out_events["recoilWenutrigWeight"] = evaluator["met_trig"](
        cms_events.recoil_Wenu0)
    out_events["recoilZmumutrigWeight"] = evaluator["met_trig"](
        cms_events.Zmumu_recoil)
    out_events["recoilZeetrigWeight"] = evaluator["met_trig"](
        cms_events.Zee_recoil)

    ## Fill weights for each CR so that we don't need to worry later

    out_events["weight_SR_2b"] = out_events.puweight * out_events.mettrigWeight
    out_events["weight_SR_1b"] = out_events.puweight * out_events.mettrigWeight

    out_events["weight_ZeeCR_2b"] = out_events.puweight * out_events.eleTrigSF0
    out_events["weight_ZeeCR_1b"] = out_events.puweight * out_events.eleTrigSF0

    out_events[
        "weight_ZmumuCR_2b"] = out_events.puweight * out_events.recoilZmumutrigWeight
    out_events[
        "weight_ZmumuCR_1b"] = out_events.puweight * out_events.recoilZmumutrigWeight

    out_events[
        "weight_TopenuCR_2b"] = out_events.puweight * out_events.eleTrigSF0
    out_events[
        "weight_TopenuCR_1b"] = out_events.puweight * out_events.eleTrigSF0

    out_events[
        "weight_TopmunuCR_2b"] = out_events.puweight * out_events.recoilWmunutrigWeight
    out_events[
        "weight_TopmunuCR_1b"] = out_events.puweight * out_events.recoilWmunutrigWeight

    out_events[
        "weight_WenuCR_1b"] = out_events.puweight * out_events.eleTrigSF0
    out_events[
        "weight_WenuCR_2b"] = out_events.puweight * out_events.eleTrigSF0

    out_events[
        "weight_WmunuCR_1b"] = out_events.puweight * out_events.recoilWmunutrigWeight
    out_events[
        "weight_WmunuCR_2b"] = out_events.puweight * out_events.recoilWmunutrigWeight

    ## Fill Histograms
    from variables import vardict, regions, variables_common
    from binning import binning

    f = TFile(outputfile, "RECREATE")
    for ireg in regions:
        thisregion = out_events[out_events[ireg] == True]
        thisregion_ = thisregion[~(ak.is_none(thisregion))]
        weight_ = "weight_" + ireg

        for ivar in variables_common[ireg]:
            hist_name_ = "h_reg_" + ireg + "_" + vardict[ivar]
            h = VarToHist(thisregion_[ivar], thisregion_[weight_], hist_name_,
                          binning[ireg][ivar])
            f.cd()
            h.Write()

    h_total = TH1F("h_total_mcweight", "h_total_mcweight", 2, 0, 2)
    h_total.SetBinContent(1, nevents)
    f.cd()
    h_total.Write()

    write_parquet = False
    if write_parquet:
        ak.to_parquet(out_events, "analysis_wjets_allevents.parquet")
Ejemplo n.º 11
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            ## Generated leptons
            gen_lep = ev.GenL
            leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
            trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        # Get the leptons. This has changed a couple of times now, but we are using fakeable objects as baseline leptons.
        # The added p4 instance has the corrected pt (conePt for fakeable) and should be used for any following selection or calculation
        # Any additional correction (if we choose to do so) should be added here, e.g. Rochester corrections, ...
        ## Muons
        mu_v     = Collections(ev, "Muon", "vetoTTH", year=year).get()  # these include all muons, tight and fakeable
        mu_t     = Collections(ev, "Muon", "tightSSTTH", year=year).get()
        mu_f     = Collections(ev, "Muon", "fakeableSSTTH", year=year).get()
        muon     = ak.concatenate([mu_t, mu_f], axis=1)
        muon['p4'] = get_four_vec_fromPtEtaPhiM(muon, get_pt(muon), muon.eta, muon.phi, muon.mass, copy=False) #FIXME new
        
        ## Electrons
        el_v        = Collections(ev, "Electron", "vetoTTH", year=year).get()
        el_t        = Collections(ev, "Electron", "tightSSTTH", year=year).get()
        el_f        = Collections(ev, "Electron", "fakeableSSTTH", year=year).get()
        electron    = ak.concatenate([el_t, el_f], axis=1)
        electron['p4'] = get_four_vec_fromPtEtaPhiM(electron, get_pt(electron), electron.eta, electron.phi, electron.mass, copy=False) #FIXME new
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            el_t_p  = prompt(el_t)
            el_t_np = nonprompt(el_t)
            el_f_p  = prompt(el_f)
            el_f_np = nonprompt(el_f)
            mu_t_p  = prompt(mu_t)
            mu_t_np = nonprompt(mu_t)
            mu_f_p  = prompt(mu_f)
            mu_f_np = nonprompt(mu_f)

            is_flipped = ( (el_t_p.matched_gen.pdgId*(-1) == el_t_p.pdgId) & (abs(el_t_p.pdgId) == 11) )
            el_t_p_cc  = el_t_p[~is_flipped]  # this is tight, prompt, and charge consistent
            el_t_p_cf  = el_t_p[is_flipped]  # this is tight, prompt, and charge flipped


        ## Merge electrons and muons. These are fakeable leptons now
        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.p4.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.p4.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton.p4 + trailing_lepton.p4).mass
        dilepton_pt = (leading_lepton.p4 + trailing_lepton.p4).pt
        #dilepton_dR = delta_r(leading_lepton, trailing_lepton)
        dilepton_dR = leading_lepton.p4.delta_r(trailing_lepton.p4)
        
        lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.p4.pt, ascending=False)].pdgId, 2, clip=True), 0)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
            n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)
            gp = ev.GenPart
            gp_e = gp[((abs(gp.pdgId)==11)&(gp.status==1)&((gp.statusFlags&(1<<0))==1)&(gp.statusFlags&(1<<8)==256))]
            gp_m = gp[((abs(gp.pdgId)==13)&(gp.status==1)&((gp.statusFlags&(1<<0))==1)&(gp.statusFlags&(1<<8)==256))]
            n_gen_lep = ak.num(gp_e) + ak.num(gp_m)
        else:
            n_gen_lep = np.zeros(len(ev))

        LL = (n_gen_lep > 2)  # this is the classifier for LL events (should mainly be ttZ/tZ/WZ...)

        mt_lep_met = mt(lepton.p4.pt, lepton.p4.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Tau and other stuff
        tau       = getTaus(ev)
        tau       = tau[~match(tau, muon, deltaRCut=0.4)] 
        tau       = tau[~match(tau, electron, deltaRCut=0.4)]

        track     = getIsoTracks(ev)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

        bl          = cross(lepton, high_score_btag)
        bl_dR       = delta_r(bl['0'], bl['1'])
        min_bl_dR   = ak.min(bl_dR, axis=1)

        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator

        # try to get either the most forward light jet, or if there's more than one with eta>1.7, the highest pt one
        most_fwd = light[ak.argsort(abs(light.eta))][:,0:1]
        #most_fwd = light[ak.singletons(ak.argmax(abs(light.eta)))]
        best_fwd = ak.concatenate([j_fwd, most_fwd], axis=1)[:,0:1]
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        #st = met_pt + ht + ak.sum(get_pt(muon), axis=1) + ak.sum(get_pt(electron), axis=1)
        st = met_pt + ht + ak.sum(lepton.p4.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            
            # PU weight
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        

        cutflow     = Cutflow(output, ev, weight=weight)

        # slightly restructured
        # calculate everything from loose, require two tights on top
        # since n_tight == n_loose == 2, the tight and loose leptons are the same in the end

        # in this selection we'll get events with exactly two fakeable+tight and two loose leptons.
        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = el_v,
            mu = muon,
            mu_veto = mu_v,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            jet_light = light,
            met = ev.MET,
        )
        
        baseline = sel.dilep_baseline(cutflow=cutflow, SS=True, omit=['N_fwd>0'])
        baseline_OS = sel.dilep_baseline(cutflow=cutflow, SS=False, omit=['N_fwd>0'])  # this is for charge flip estimation
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):

            BL = (baseline & ((ak.num(el_t_p_cc)+ak.num(mu_t_p))==2))  # this is the MC baseline for events with two tight prompt leptons
            BL_incl = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2)) # this is the MC baseline for events with two tight leptons

            np_est_sel_mc = (baseline & \
                ((((ak.num(el_t_p_cc)+ak.num(mu_t_p))==1) & ((ak.num(el_f_np)+ak.num(mu_f_np))==1)) | (((ak.num(el_t_p_cc)+ak.num(mu_t_p))==0) & ((ak.num(el_f_np)+ak.num(mu_f_np))==2)) ))  # no overlap between tight and nonprompt, and veto on additional leptons. this should be enough
            np_obs_sel_mc = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2) & ((ak.num(el_t_np)+ak.num(mu_t_np))>=1) )  # two tight leptons, at least one nonprompt
            np_est_sel_data = (baseline & ~baseline)  # this has to be false

            cf_est_sel_mc = (baseline_OS & ((ak.num(el_t_p)+ak.num(mu_t_p))==2))
            cf_obs_sel_mc = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2) & ((ak.num(el_t_p_cf))>=1) )  # two tight leptons, at least one electron charge flip
            cf_est_sel_data = (baseline & ~baseline)  # this has to be false

            weight_np_mc = self.nonpromptWeight.get(el_f_np, mu_f_np, meas='TT')
            weight_cf_mc = self.chargeflipWeight.flip_weight(el_t_p)

        else:
            BL = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2))

            BL_incl = BL

            np_est_sel_mc = (baseline & ~baseline)
            np_obs_sel_mc = (baseline & ~baseline)
            np_est_sel_data = (baseline & (ak.num(el_t)+ak.num(mu_t)==1) & (ak.num(el_f)+ak.num(mu_f)==1) )

            cf_est_sel_mc = (baseline & ~baseline)
            cf_obs_sel_mc = (baseline & ~baseline)
            cf_est_sel_data = (baseline_OS & ((ak.num(el_t)+ak.num(mu_t))==2) )

            weight_np_mc = np.zeros(len(ev))
            weight_cf_mc = np.zeros(len(ev))

            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)

            if False:
                output['%s_run'%dataset] += processor.column_accumulator(run_[BL])
                output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL])
                output['%s_event'%dataset] += processor.column_accumulator(event_[BL])

        weight_BL = weight.weight()[BL]  # this is just a shortened weight list for the two prompt selection
        weight_np_data = self.nonpromptWeight.get(el_f, mu_f, meas='data')
        weight_cf_data = self.chargeflipWeight.flip_weight(el_t)

        out_sel = (BL | np_est_sel_mc | cf_est_sel_mc)

        dummy = (np.ones(len(ev))==1)
        def fill_multiple_np(hist, arrays, add_sel=dummy):
            #reg_sel = [BL, np_est_sel_mc, np_obs_sel_mc, np_est_sel_data, cf_est_sel_mc, cf_obs_sel_mc, cf_est_sel_data],
            #print ('len', len(reg_sel[0]))
            #print ('sel', reg_sel[0])
            reg_sel = [BL&add_sel, BL_incl&add_sel, np_est_sel_mc&add_sel, np_obs_sel_mc&add_sel, np_est_sel_data&add_sel, cf_est_sel_mc&add_sel, cf_obs_sel_mc&add_sel, cf_est_sel_data&add_sel],
            fill_multiple(
                hist,
                datasets=[
                    dataset, # only prompt contribution from process
                    dataset+"_incl", # everything from process (inclusive MC truth)
                    "np_est_mc", # MC based NP estimate
                    "np_obs_mc", # MC based NP observation
                    "np_est_data",
                    "cf_est_mc",
                    "cf_obs_mc",
                    "cf_est_data",
                ],
                arrays=arrays,
                selections=reg_sel[0],  # no idea where the additional dimension is coming from...
                weights=[
                    weight.weight()[reg_sel[0][0]],
                    weight.weight()[reg_sel[0][1]],
                    weight.weight()[reg_sel[0][2]]*weight_np_mc[reg_sel[0][2]],
                    weight.weight()[reg_sel[0][3]],
                    weight.weight()[reg_sel[0][4]]*weight_np_data[reg_sel[0][4]],
                    weight.weight()[reg_sel[0][5]]*weight_cf_mc[reg_sel[0][5]],
                    weight.weight()[reg_sel[0][6]],
                    weight.weight()[reg_sel[0][7]]*weight_cf_data[reg_sel[0][7]],
                ],
            )

        if self.evaluate or self.dump:
            # define the inputs to the NN
            # this is super stupid. there must be a better way.
            # used a np.stack which is ok performance wise. pandas data frame seems to be slow and memory inefficient
            #FIXME no n_b, n_fwd back in v13/v14 of the DNN

            NN_inputs_d = {
                'n_jet':            ak.to_numpy(ak.num(jet)),
                'n_fwd':            ak.to_numpy(ak.num(fwd)),
                'n_b':              ak.to_numpy(ak.num(btag)),
                'n_tau':            ak.to_numpy(ak.num(tau)),
                #'n_track':          ak.to_numpy(ak.num(track)),
                'st':               ak.to_numpy(st),
                'met':              ak.to_numpy(ev.MET.pt),
                'mjj_max':          ak.to_numpy(ak.fill_none(ak.max(mjf, axis=1),0)),
                'delta_eta_jj':     ak.to_numpy(pad_and_flatten(delta_eta)),
                'lead_lep_pt':      ak.to_numpy(pad_and_flatten(leading_lepton.p4.pt)),
                'lead_lep_eta':     ak.to_numpy(pad_and_flatten(leading_lepton.p4.eta)),
                'sublead_lep_pt':   ak.to_numpy(pad_and_flatten(trailing_lepton.p4.pt)),
                'sublead_lep_eta':  ak.to_numpy(pad_and_flatten(trailing_lepton.p4.eta)),
                'dilepton_mass':    ak.to_numpy(pad_and_flatten(dilepton_mass)),
                'dilepton_pt':      ak.to_numpy(pad_and_flatten(dilepton_pt)),
                'fwd_jet_pt':       ak.to_numpy(pad_and_flatten(best_fwd.pt)),
                'fwd_jet_p':        ak.to_numpy(pad_and_flatten(best_fwd.p)),
                'fwd_jet_eta':      ak.to_numpy(pad_and_flatten(best_fwd.eta)),
                'lead_jet_pt':      ak.to_numpy(pad_and_flatten(jet[:, 0:1].pt)),
                'sublead_jet_pt':   ak.to_numpy(pad_and_flatten(jet[:, 1:2].pt)),
                'lead_jet_eta':     ak.to_numpy(pad_and_flatten(jet[:, 0:1].eta)),
                'sublead_jet_eta':  ak.to_numpy(pad_and_flatten(jet[:, 1:2].eta)),
                'lead_btag_pt':     ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1].pt)),
                'sublead_btag_pt':  ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2].pt)),
                'lead_btag_eta':    ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1].eta)),
                'sublead_btag_eta': ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2].eta)),
                'min_bl_dR':        ak.to_numpy(ak.fill_none(min_bl_dR, 0)),
                'min_mt_lep_met':   ak.to_numpy(ak.fill_none(min_mt_lep_met, 0)),
            }

            if self.dump:
                for k in NN_inputs_d.keys():
                    output[k] += processor.column_accumulator(NN_inputs_d[k][out_sel])

            if self.evaluate:
            
                NN_inputs = np.stack( [NN_inputs_d[k] for k in NN_inputs_d.keys()] )

                NN_inputs = np.nan_to_num(NN_inputs, 0, posinf=1e5, neginf=-1e5)  # events with posinf/neginf/nan will not pass the BL selection anyway

                NN_inputs = np.moveaxis(NN_inputs, 0, 1)  # this is needed for a np.stack (old version)

                model, scaler = load_onnx_model(self.training)

                try:
                    NN_inputs_scaled = scaler.transform(NN_inputs)

                    NN_pred    = predict_onnx(model, NN_inputs_scaled)

                    best_score = np.argmax(NN_pred, axis=1)


                except ValueError:
                    print ("Problem with prediction. Showing the shapes here:")
                    print (np.shape(NN_inputs))
                    print (np.shape(weight_BL))
                    NN_pred = np.array([])
                    best_score = np.array([])
                    NN_inputs_scaled = NN_inputs
                    raise

                ##k.clear_session()

                #FIXME below needs to be fixed again with changed NN evaluation. Should work now

                fill_multiple_np(output['node'], {'multiplicity':best_score})
                fill_multiple_np(output['node0_score_incl'], {'score':NN_pred[:,0]})
                fill_multiple_np(output['node1_score_incl'], {'score':NN_pred[:,1]})
                fill_multiple_np(output['node2_score_incl'], {'score':NN_pred[:,2]})
                fill_multiple_np(output['node3_score_incl'], {'score':NN_pred[:,3]})
                fill_multiple_np(output['node4_score_incl'], {'score':NN_pred[:,4]})
                
                fill_multiple_np(output['node0_score'], {'score':NN_pred[:,0]}, add_sel=(best_score==0))
                fill_multiple_np(output['node1_score'], {'score':NN_pred[:,1]}, add_sel=(best_score==1))
                fill_multiple_np(output['node2_score'], {'score':NN_pred[:,2]}, add_sel=(best_score==2))
                fill_multiple_np(output['node3_score'], {'score':NN_pred[:,3]}, add_sel=(best_score==3))
                fill_multiple_np(output['node4_score'], {'score':NN_pred[:,4]}, add_sel=(best_score==4))

                #SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0)))
                #SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0)))
                #leading_lepton_BL = leading_lepton[BL]

                #output['lead_lep_SR_pp'].fill(
                #    dataset = dataset,
                #    pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)),
                #    weight = weight_BL[SR_sel_pp]
                #)

                #output['lead_lep_SR_mm'].fill(
                #    dataset = dataset,
                #    pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)),
                #    weight = weight_BL[SR_sel_mm]
                #)

                del model
                del scaler
                del NN_inputs, NN_inputs_scaled, NN_pred

        labels = {'topW_v3': 0, 'TTW':1, 'TTZ': 2, 'TTH': 3, 'ttbar': 4, 'rare':5, 'diboson':6}  # these should be all?
        if dataset in labels:
            label_mult = labels[dataset]
        else:
            label_mult = 7  # data or anything else

        if self.dump:
            output['label']     += processor.column_accumulator(np.ones(len(ev[out_sel])) * label_mult)
            output['SS']        += processor.column_accumulator(ak.to_numpy(BL[out_sel]))
            output['OS']        += processor.column_accumulator(ak.to_numpy(cf_est_sel_mc[out_sel]))
            output['AR']        += processor.column_accumulator(ak.to_numpy(np_est_sel_mc[out_sel]))
            output['LL']        += processor.column_accumulator(ak.to_numpy(LL[out_sel]))
            output['weight']    += processor.column_accumulator(ak.to_numpy(weight.weight()[out_sel]))
            output['weight_np'] += processor.column_accumulator(ak.to_numpy(weight_np_mc[out_sel]))
            output['weight_cf'] += processor.column_accumulator(ak.to_numpy(weight_cf_mc[out_sel]))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL)
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL)
        fill_multiple_np(output['N_jet'],     {'multiplicity': ak.num(jet)})
        fill_multiple_np(output['N_b'],       {'multiplicity': ak.num(btag)})
        fill_multiple_np(output['N_central'], {'multiplicity': ak.num(central)})
        fill_multiple_np(output['N_ele'],     {'multiplicity':ak.num(electron)})
        fill_multiple_np(output['N_mu'],      {'multiplicity':ak.num(muon)})
        fill_multiple_np(output['N_fwd'],     {'multiplicity':ak.num(fwd)})
        fill_multiple_np(output['ST'],        {'ht': st})
        fill_multiple_np(output['HT'],        {'ht': ht})

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL)
            output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL)
            output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL)
            output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL)
            output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL)
            output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL)
            output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL)

        fill_multiple_np(output['MET'], {'pt':ev.MET.pt, 'phi':ev.MET.phi})

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['lead_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
                weight = weight_BL
            )

            output['trail_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
                weight = weight_BL
            )
        
        fill_multiple_np(
            output['lead_lep'],
            {
                'pt':  pad_and_flatten(leading_lepton.p4.pt),
                'eta': pad_and_flatten(leading_lepton.eta),
                'phi': pad_and_flatten(leading_lepton.phi),
            },
        )

        fill_multiple_np(
            output['trail_lep'],
            {
                'pt':  pad_and_flatten(trailing_lepton.p4.pt),
                'eta': pad_and_flatten(trailing_lepton.eta),
                'phi': pad_and_flatten(trailing_lepton.phi),
            },
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight_BL
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight_BL
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight_BL
        )
        
        fill_multiple_np(
            output['fwd_jet'],
            {
                'pt':  pad_and_flatten(best_fwd.pt),
                'eta': pad_and_flatten(best_fwd.eta),
                'phi': pad_and_flatten(best_fwd.phi),
            },
        )
        
        #output['fwd_jet'].fill(
        #    dataset = dataset,
        #    pt  = ak.flatten(j_fwd[BL].pt),
        #    eta = ak.flatten(j_fwd[BL].eta),
        #    phi = ak.flatten(j_fwd[BL].phi),
        #    weight = weight_BL
        #)
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(best_fwd[BL].p), weight = weight_BL)
        
        return output
Ejemplo n.º 12
0
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton + trailing_lepton).mass
        dilepton_pt = (leading_lepton + trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        tau = getTaus(ev)
        track = getIsoTracks(ev)

        bl = cross(lepton, high_score_btag)
        bl_dR = delta_r(bl['0'], bl['1'])
        min_bl_dR = ak.min(bl_dR, axis=1)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        # define the weight
        weight = Weights(len(ev))

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

            #weight.add("trigger", self.triggerSF.get(electron, muon))

        cutflow = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset=dataset,
            events=ev,
            year=self.year,
            ele=electron,
            ele_veto=vetoelectron,
            mu=muon,
            mu_veto=vetomuon,
            jet_all=jet,
            jet_central=central,
            jet_btag=btag,
            jet_fwd=fwd,
            met=ev.MET,
        )

        BL = sel.dilep_baseline(cutflow=cutflow, SS=False)

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[BL].npvs,
                               weight=weight.weight()[BL])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[BL].npvsGood,
                                   weight=weight.weight()[BL])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[BL],
                             weight=weight.weight()[BL])
        output['N_tau'].fill(dataset=dataset,
                             multiplicity=ak.num(tau)[BL],
                             weight=weight.weight()[BL])

        output['N_track'].fill(dataset=dataset,
                               multiplicity=ak.num(track)[BL],
                               weight=weight.weight()[BL])

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[BL_minusNb],
                           weight=weight.weight()[BL_minusNb])

        output['N_central'].fill(dataset=dataset,
                                 multiplicity=ak.num(central)[BL],
                                 weight=weight.weight()[BL])
        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[BL],
                             weight=weight.weight()[BL])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(electron)[BL],
                            weight=weight.weight()[BL])

        BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
        output['N_fwd'].fill(dataset=dataset,
                             multiplicity=ak.num(fwd)[BL_minusFwd],
                             weight=weight.weight()[BL_minusFwd])

        output['dilep_pt'].fill(dataset=dataset,
                                pt=ak.flatten(dilepton_pt[BL]),
                                weight=weight.weight()[BL])
        output['dilep_mass'].fill(dataset=dataset,
                                  mass=ak.flatten(dilepton_mass[BL]),
                                  weight=weight.weight()[BL])
        output['mjf_max'].fill(dataset=dataset,
                               mass=mjf_max[BL],
                               weight=weight.weight()[BL])
        output['deltaEta'].fill(dataset=dataset,
                                eta=ak.flatten(deltaEta[BL]),
                                weight=weight.weight()[BL])
        output['min_bl_dR'].fill(dataset=dataset,
                                 eta=min_bl_dR[BL],
                                 weight=weight.weight()[BL])
        output['min_mt_lep_met'].fill(dataset=dataset,
                                      pt=min_mt_lep_met[BL],
                                      weight=weight.weight()[BL])

        output['leading_jet_pt'].fill(dataset=dataset,
                                      pt=ak.flatten(jet[:, 0:1][BL].pt),
                                      weight=weight.weight()[BL])
        output['subleading_jet_pt'].fill(dataset=dataset,
                                         pt=ak.flatten(jet[:, 1:2][BL].pt),
                                         weight=weight.weight()[BL])
        output['leading_jet_eta'].fill(dataset=dataset,
                                       eta=ak.flatten(jet[:, 0:1][BL].eta),
                                       weight=weight.weight()[BL])
        output['subleading_jet_eta'].fill(dataset=dataset,
                                          eta=ak.flatten(jet[:, 1:2][BL].eta),
                                          weight=weight.weight()[BL])

        output['leading_btag_pt'].fill(dataset=dataset,
                                       pt=ak.flatten(
                                           high_score_btag[:, 0:1][BL].pt),
                                       weight=weight.weight()[BL])
        output['subleading_btag_pt'].fill(dataset=dataset,
                                          pt=ak.flatten(
                                              high_score_btag[:, 1:2][BL].pt),
                                          weight=weight.weight()[BL])
        output['leading_btag_eta'].fill(dataset=dataset,
                                        eta=ak.flatten(
                                            high_score_btag[:, 0:1][BL].eta),
                                        weight=weight.weight()[BL])
        output['subleading_btag_eta'].fill(
            dataset=dataset,
            eta=ak.flatten(high_score_btag[:, 1:2][BL].eta),
            weight=weight.weight()[BL])

        BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])
        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[BL_minusMET].pt,
                           phi=ev.MET[BL_minusMET].phi,
                           weight=weight.weight()[BL_minusMET])

        #output['electron'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(electron[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(electron[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(electron[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        #
        #output['muon'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(muon[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(muon[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(muon[BL].phi)),
        #    weight = weight.weight()[BL]
        #)

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['trail_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['fwd_jet'].fill(dataset=dataset,
                               pt=ak.flatten(high_p_fwd[BL].pt_nom),
                               eta=ak.flatten(high_p_fwd[BL].eta),
                               phi=ak.flatten(high_p_fwd[BL].phi),
                               weight=weight.weight()[BL])

        output['b1'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 0:1][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 0:1][BL].phi),
                          weight=weight.weight()[BL])

        output['b2'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 1:2][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt_nom[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        output['j2'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(jet[:, 1:2][BL].eta),
                          phi=ak.flatten(jet[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j3'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 2:3][BL].pt_nom),
                          eta=ak.flatten(jet[:, 2:3][BL].eta),
                          phi=ak.flatten(jet[:, 2:3][BL].phi),
                          weight=weight.weight()[BL])

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run' % dataset] += processor.column_accumulator(
                run_[BL])
            output['%s_lumi' % dataset] += processor.column_accumulator(
                lumi_[BL])
            output['%s_event' % dataset] += processor.column_accumulator(
                event_[BL])

        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt > 25)]
                jet = jet[~match(
                    jet, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet = jet[~match(
                    jet, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                central = jet[(abs(jet.eta) < 2.4)]
                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd = getFwdJet(light)
                fwd_noPU = getFwdJet(light, puId=False)

                ## forward jets
                high_p_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.p, axis=1))]  # highest momentum spectator
                high_pt_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(
                    ak.argmax(abs(fwd.eta), axis=1))]  # most forward spectator

                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(
                    central.btagDeepFlavB)][:, :2]

                met = ev.MET
                #met['pt'] = getattr(met, var)

                sel = Selection(
                    dataset=dataset,
                    events=ev,
                    year=self.year,
                    ele=electron,
                    ele_veto=vetoelectron,
                    mu=muon,
                    mu_veto=vetomuon,
                    jet_all=jet,
                    jet_central=central,
                    jet_btag=btag,
                    jet_fwd=fwd,
                    met=met,
                )

                BL = sel.dilep_baseline(SS=False)

                # get the modified selection -> more difficult
                #selection.add('N_jet>2_'+var, (ak.num(jet.pt)>=3)) # stupid bug here...
                #selection.add('N_btag=2_'+var,      (ak.num(btag)==2) )
                #selection.add('N_central>1_'+var,   (ak.num(central)>=2) )
                #selection.add('N_fwd>0_'+var,       (ak.num(fwd)>=1) )
                #selection.add('MET>30_'+var, (getattr(ev.MET, var)>30) )

                ### Don't change the selection for now...
                #bl_reqs = os_reqs + ['N_jet>2_'+var, 'MET>30_'+var, 'N_btag=2_'+var, 'N_central>1_'+var, 'N_fwd>0_'+var]
                #bl_reqs_d = { sel: True for sel in bl_reqs }
                #BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(dataset=dataset,
                                            multiplicity=ak.num(jet)[BL],
                                            weight=weight.weight()[BL])
                BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
                output['N_fwd_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(fwd)[BL_minusFwd],
                    weight=weight.weight()[BL_minusFwd])
                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[BL_minusNb],
                    weight=weight.weight()[BL_minusNb])
                output['N_central_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(central)[BL],
                    weight=weight.weight()[BL])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet.eta[:, 0:1][BL]),
                                         phi=ak.flatten(jet.phi[:, 0:1][BL]),
                                         weight=weight.weight()[BL])

                output['b1_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta=ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi=ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight=weight.weight()[BL])

                output['fwd_jet_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_p_fwd[BL].pt),
                    #p   = ak.flatten(high_p_fwd[BL].p),
                    eta=ak.flatten(high_p_fwd[BL].eta),
                    phi=ak.flatten(high_p_fwd[BL].phi),
                    weight=weight.weight()[BL])

                BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])
                output['MET_' + var].fill(
                    dataset=dataset,
                    #pt  = getattr(ev.MET, var)[BL_minusMET],
                    pt=ev.MET[BL_minusMET].pt,
                    phi=ev.MET[BL_minusMET].phi,
                    weight=weight.weight()[BL_minusMET])

        return output
Ejemplo n.º 13
0
cms_events["jetptLoose"] = cms_events.jetpt[cms_events.jet_sel_loose == True]
cms_events["jetetaLoose"] = cms_events.jeteta[cms_events.jet_sel_loose == True]
cms_events["jetphiLoose"] = cms_events.jetphi[cms_events.jet_sel_loose == True]

cms_events["jet_sel_tight0"] = ak.Array(
    getN(cms_events.jet_sel_tight[cms_events.jet_sel_loose == True], 0))
cms_events["jet_sel_b_0"] = ak.Array(getN(cms_events.jet_sel_b, 0))
cms_events["jet_sel_b_1"] = ak.Array(getN(cms_events.jet_sel_b, 1))

cms_events["nJetLoose"] = ak.sum(cms_events.jet_sel_loose, axis=-1)
cms_events["nJetTight"] = ak.sum(cms_events.jet_sel_tight, axis=-1)
cms_events["nJetb"] = ak.sum(cms_events.jet_sel_b, axis=-1)

cms_events["dphi_jet_met"] = DeltaPhi(
    cms_events.jetphi[cms_events.jet_sel_loose == True], cms_events.metphi)
cms_events["min_dphi_jet_met"] = ak.min(cms_events.dphi_jet_met, axis=-1)

#--------------------------------------------------------------------------------------------------
## W --> lepton + nu
#--------------------------------------------------------------------------------------------------

mask_wmunu1b = ((cms_events.nEleLoose == 0) & (cms_events.npho == 0) &
                (cms_events.ntau == 0) & (cms_events.nMuLoose == 1) &
                (cms_events.nMuTight == 1) & (cms_events.recoil_Wmunu0 > 200.)
                & (cms_events.min_dphi_jet_met > 0.5) &
                (cms_events.nJetLoose == 1) & (cms_events.nJetTight == 1) &
                (cms_events.nJetb == 1) & (cms_events.mt_Wmunu0 > 0) &
                (cms_events.mt_Wmunu0 < 160))

cms_events["mask_wmunu1b"] = mask_wmunu1b
'''