def setupNPArray(self, cuts, variables):
     varDict = {}
     for v, d in variables.items():
         if d[4] > 0:
             varDict[v] = processor.column_accumulator(np.zeros(shape=(0)))
     for name in cuts.keys():
         varDict[name] = processor.column_accumulator(np.zeros(shape=(0)))
     self._accumulator = processor.dict_accumulator(varDict)
     self.setupNPArr = True
 def __init__(self):
     self._accumulator = processor.dict_accumulator({
         'sumw': processor.defaultdict_accumulator(float),
         'nevents': processor.defaultdict_accumulator(float),
         'variables': processor.defaultdict_accumulator(processor.column_accumulator(np.transpose(np.array([[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]))).identity),
         'variables_merged':  processor.defaultdict_accumulator(processor.column_accumulator(np.transpose(np.array([[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]))).identity),
         'weights':  processor.defaultdict_accumulator(processor.column_accumulator(np.array([])).identity),
         'weights_merged':  processor.defaultdict_accumulator(processor.column_accumulator(np.array([])).identity),
     })
Beispiel #3
0
def test_accumulators():
    a = processor.value_accumulator(float)
    a += 3.
    assert a.value == 3.
    assert a.identity().value == 0.

    a = processor.value_accumulator(partial(np.array, [2.]))
    a += 3.
    assert np.array_equal(a.value, np.array([5.]))
    assert np.array_equal(a.identity().value, np.array([2.]))

    l = processor.list_accumulator(range(4))
    l += [3]
    l += processor.list_accumulator([1, 2])
    assert l == [0, 1, 2, 3, 3, 1, 2]

    b = processor.set_accumulator({'apples', 'oranges'})
    b += {'pears'}
    b += 'grapes'
    assert b == {'apples', 'oranges', 'pears', 'grapes'}

    c = processor.dict_accumulator({'num': a, 'fruit': b})
    c['num'] += 2.
    c += processor.dict_accumulator({
        'num2': processor.value_accumulator(int),
        'fruit': processor.set_accumulator({'apples', 'cherries'}),
    })
    assert c['num2'].value == 0
    assert np.array_equal(c['num'].value, np.array([7.]))
    assert c['fruit'] == {'apples', 'oranges', 'pears', 'grapes', 'cherries'}

    d = processor.defaultdict_accumulator(float)
    d['x'] = 0.
    d['x'] += 4.
    d['y'] += 5.
    d['z'] += d['x']
    d['x'] += d['y']
    assert d['x'] == 9.
    assert d['y'] == 5.
    assert d['z'] == 4.
    assert d['w'] == 0.

    e = d + c

    f = processor.defaultdict_accumulator(lambda: 2.)
    f['x'] += 4.
    assert f['x'] == 6.

    f += f
    assert f['x'] == 12.
    assert f['y'] == 2.

    a = processor.column_accumulator(np.arange(6).reshape(2,3))
    b = processor.column_accumulator(np.arange(12).reshape(4,3))
    a += b
    assert a.value.sum() == 81
Beispiel #4
0
def test_accumulators():
    a = processor.value_accumulator(float)
    a += 3.0
    assert a.value == 3.0
    assert a.identity().value == 0.0

    a = processor.value_accumulator(partial(np.array, [2.0]))
    a += 3.0
    assert np.array_equal(a.value, np.array([5.0]))
    assert np.array_equal(a.identity().value, np.array([2.0]))

    lacc = processor.list_accumulator(range(4))
    lacc += [3]
    lacc += processor.list_accumulator([1, 2])
    assert lacc == [0, 1, 2, 3, 3, 1, 2]

    b = processor.set_accumulator({"apples", "oranges"})
    b += {"pears"}
    b += "grapes"
    assert b == {"apples", "oranges", "pears", "grapes"}

    c = processor.dict_accumulator({"num": a, "fruit": b})
    c["num"] += 2.0
    c += processor.dict_accumulator({
        "num2":
        processor.value_accumulator(int),
        "fruit":
        processor.set_accumulator({"apples", "cherries"}),
    })
    assert c["num2"].value == 0
    assert np.array_equal(c["num"].value, np.array([7.0]))
    assert c["fruit"] == {"apples", "oranges", "pears", "grapes", "cherries"}

    d = processor.defaultdict_accumulator(float)
    d["x"] = 0.0
    d["x"] += 4.0
    d["y"] += 5.0
    d["z"] += d["x"]
    d["x"] += d["y"]
    assert d["x"] == 9.0
    assert d["y"] == 5.0
    assert d["z"] == 4.0
    assert d["w"] == 0.0

    f = processor.defaultdict_accumulator(lambda: 2.0)
    f["x"] += 4.0
    assert f["x"] == 6.0

    f += f
    assert f["x"] == 12.0
    assert f["y"] == 2.0

    a = processor.column_accumulator(np.arange(6).reshape(2, 3))
    b = processor.column_accumulator(np.arange(12).reshape(4, 3))
    a += b
    assert a.value.sum() == 81
    def __init__(self, data_type='data'):
        self.data_type = data_type
        self._accumulator = processor.dict_accumulator({
            'run_1':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'lumi_1':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'event_1':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'run_2':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'lumi_2':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'event_2':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'era_1':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'era_2':
            processor.column_accumulator(np.zeros(shape=(0, ))),
        })

        self.pucorrs = get_pu_weights_function()
        ## NOT applied for now
        self.nlo_w = get_nlo_weight_function('w')
        self.nlo_z = get_nlo_weight_function('z')
 def __init__(self, category='00'):
     self.category = category
     dataset_axis = hist.Cat('dataset', 'dataset')
     self._accumulator = processor.dict_accumulator({
         'dphi':
         processor.column_accumulator(np.zeros(shape=(0, ))),
     })
Beispiel #7
0
 def fill_tree(variable, values):
     treeacc = processor.column_accumulator(values)
     name = f'tree_{region}_{variable}'
     if dataset in output[name].keys():
         output[name][dataset] += treeacc
     else:
         output[name][dataset] = treeacc
 def __init__(self,
              cols=[
                  "pt", "eta", "y", "phi", "mass", "l_xy", "l_xy_unc",
                  "sv_prob", "cos2D"
              ]):  #, name="Bcands", outputfile=None, reuseoutputfile=None):
     self._cols = cols
     #self._name = name
     for col in cols:
         self[col] = processor.column_accumulator(np.array([]))
Beispiel #9
0
        'DoubleMuon': fileset_all['DoubleMuon_Run2018'],
        'EGamma': fileset_all['EGamma_Run2018'],
        'diboson': fileset_all['diboson'],
        'TTXnoW': fileset_all['TTXnoW'],
        'TTW': fileset_all['TTW'],
        #'WZ': fileset_all['WZ'],
        'DY': fileset_all['DY'],
    }

    fileset = make_small(fileset, small, 1)

    add_processes_to_output(fileset, desired_output)
    for rle in ['run', 'lumi', 'event']:
        desired_output.update({
            'MuonEG_%s' % rle:
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'EGamma_%s' % rle:
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'DoubleMuon_%s' % rle:
            processor.column_accumulator(np.zeros(shape=(0, ))),
            "M_ll":
            hist.Hist("Counts", dataset_axis, mass_axis),
            "M3l":
            hist.Hist("Counts", dataset_axis, mass_axis),
            "ST":
            hist.Hist("Counts", dataset_axis, ht_axis),
            "HT":
            hist.Hist("Counts", dataset_axis, ht_axis),
            "LT":
            hist.Hist("Counts", dataset_axis, ht_axis),
            "onZ_pt":
Beispiel #10
0
def test_new_accumulators():
    a = processor.accumulate((0.0, 3.0))
    assert a == 3.0

    a = processor.accumulate((
        np.array([2.0]),
        3.0,
    ))
    assert np.array_equal(a, np.array([5.0]))

    lacc = processor.accumulate((
        list(range(4)),
        [3],
        [1, 2],
    ))
    assert lacc == [0, 1, 2, 3, 3, 1, 2]

    b = processor.accumulate((
        {"apples", "oranges"},
        {"pears"},
        {"grapes"},
    ))
    assert b == {"apples", "oranges", "pears", "grapes"}

    c = processor.accumulate((
        {
            "num": a,
            "fruit": b
        },
        {
            "num": 2.0
        },
        {
            "num2": 0,
            "fruit": {"apples", "cherries"},
        },
    ))
    assert c["num2"] == 0
    assert np.array_equal(c["num"], np.array([7.0]))
    assert c["fruit"] == {"apples", "oranges", "pears", "grapes", "cherries"}

    d = processor.accumulate((
        defaultdict(float),
        {
            "x": 4.0,
            "y": 5.0
        },
        {
            "z": 4.0,
            "x": 5.0
        },
    ))
    assert d["x"] == 9.0
    assert d["y"] == 5.0
    assert d["z"] == 4.0
    # this is different than old style!
    with pytest.raises(KeyError):
        d["w"]

    f = processor.accumulate((
        defaultdict(lambda: 2.0),
        defaultdict(lambda: 2, {"x": 4.0}),
    ))
    assert f["x"] == 4.0
    assert f["y"] == 2.0

    # this is different than old style!
    f = processor.accumulate([f], f)
    assert f["x"] == 8.0
    assert f["y"] == 4.0
    assert f["z"] == 2.0

    a = processor.accumulate((
        processor.column_accumulator(np.arange(6).reshape(2, 3)),
        processor.column_accumulator(np.arange(12).reshape(4, 3)),
    ))
    assert a.value.sum() == 81
 def __init__(self):
     self._accumulator = processor.dict_accumulator({
         "j1pt":processor.column_accumulator(np.array([])),
             "j1phi":processor.column_accumulator(np.array([])),
             "j1eta":processor.column_accumulator(np.array([])),
             "j1mass":processor.column_accumulator(np.array([])),
             "j2pt":processor.column_accumulator(np.array([])),
             "j2phi":processor.column_accumulator(np.array([])),
             "j2eta":processor.column_accumulator(np.array([])),
             "j2mass":processor.column_accumulator(np.array([])),
             "j3pt":processor.column_accumulator(np.array([])),
             "j3phi":processor.column_accumulator(np.array([])),
             "j3eta":processor.column_accumulator(np.array([])),
             "j3mass":processor.column_accumulator(np.array([])),
             "dR12":processor.column_accumulator(np.array([])),
             "dR13":processor.column_accumulator(np.array([])),
             "dR23":processor.column_accumulator(np.array([])),
             "j1btag":processor.column_accumulator(np.array([])),
             "j2btag":processor.column_accumulator(np.array([])),
             "j3btag":processor.column_accumulator(np.array([])),
             "j1area":processor.column_accumulator(np.array([])),
             "j2area":processor.column_accumulator(np.array([])),
             "j3area":processor.column_accumulator(np.array([])),
             "j12deta":processor.column_accumulator(np.array([])),
             "j23deta":processor.column_accumulator(np.array([])),
             "j13deta":processor.column_accumulator(np.array([])),
             "j12dphi":processor.column_accumulator(np.array([])),
             "j23dphi":processor.column_accumulator(np.array([])),
             "j13dphi":processor.column_accumulator(np.array([])),
             "j1j2mass":processor.column_accumulator(np.array([])),
             "j2j3mass":processor.column_accumulator(np.array([])),
             "j1j3mass":processor.column_accumulator(np.array([])),
             "event":processor.column_accumulator(np.array([])),
             "truth":processor.column_accumulator(np.array([])) })
     print("done")
Beispiel #12
0
        'MuonEG': fileset_2018['MuonEG'],
        'DoubleMuon': fileset_2018['DoubleMuon'],
        'EGamma': fileset_2018['EGamma'],
        'diboson': fileset_2018['diboson'],
        'TTXnoW': fileset_2018['TTXnoW'],
        'TTW': fileset_2018['TTW'],
        #'WZ': fileset_2018['WZ'],
        'DY': fileset_2018['DY'],
    }

    fileset = make_small(fileset, small, 1)

    add_processes_to_output(fileset, desired_output)
    for rle in ['run', 'lumi', 'event']:
        desired_output.update({
                'MuonEG_%s'%rle: processor.column_accumulator(np.zeros(shape=(0,))),
                'EGamma_%s'%rle: processor.column_accumulator(np.zeros(shape=(0,))),
                'DoubleMuon_%s'%rle: processor.column_accumulator(np.zeros(shape=(0,))),
             })

    histograms = sorted(list(desired_output.keys()))

    
    if not overwrite:
        cache.load()
    
    
    if local:
        exe_args = {
            'workers': 12,
            'function_args': {'flatten': False},
Beispiel #13
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        OSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)<0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        OSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)<0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        lepton   = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)<0, axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        high_p_fwd   = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        high_pt_fwd  = fwd[ak.singletons(ak.argmax(fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator
        
        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]
        
        jf          = cross(high_p_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        deltaEta    = abs(high_p_fwd.eta - jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max     = ak.max(mjf, axis=1)
        
        jj          = choose(jet, 2)
        mjj_max     = ak.max((jj['0']+jj['1']).mass, axis=1)
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        ht_central = ak.sum(central.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        
        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )

        BL = sel.dilep_baseline(cutflow=cutflow, SS=False)
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb])

        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])

        BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd])
        
        BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL_minusMET].pt,
            phi  = ev.MET[BL_minusMET].phi,
            weight = weight.weight()[BL_minusMET]
        )
        
        #output['electron'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(electron[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(electron[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(electron[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        #
        #output['muon'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(muon[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(muon[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(muon[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_p_fwd[BL].pt_nom),
            eta = ak.flatten(high_p_fwd[BL].eta),
            phi = ak.flatten(high_p_fwd[BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['b1'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
            eta = ak.flatten(high_score_btag[:, 0:1][BL].eta),
            phi = ak.flatten(high_score_btag[:, 0:1][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['b2'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
            eta = ak.flatten(high_score_btag[:, 1:2][BL].eta),
            phi = ak.flatten(high_score_btag[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run'%dataset] += processor.column_accumulator(run_[BL])
            output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL])
            output['%s_event'%dataset] += processor.column_accumulator(event_[BL])
        
        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId>1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt>25)]
                jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
                jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons

                central   = jet[(abs(jet.eta)<2.4)]
                btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
                light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd       = getFwdJet(light)
                fwd_noPU  = getFwdJet(light, puId=False)
        
                ## forward jets
                high_p_fwd   = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
                high_pt_fwd  = fwd[ak.singletons(ak.argmax(fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator
        
                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

                met = ev.MET
                met['pt'] = getattr(met, var)

                sel = Selection(
                    dataset = dataset,
                    events = ev,
                    year = self.year,
                    ele = electron,
                    ele_veto = vetoelectron,
                    mu = muon,
                    mu_veto = vetomuon,
                    jet_all = jet,
                    jet_central = central,
                    jet_btag = btag,
                    jet_fwd = fwd,
                    met = met,
                )

                BL = sel.dilep_baseline(SS=False)

                # get the modified selection -> more difficult
                #selection.add('N_jet>2_'+var, (ak.num(jet.pt)>=3)) # stupid bug here...
                #selection.add('N_btag=2_'+var,      (ak.num(btag)==2) ) 
                #selection.add('N_central>1_'+var,   (ak.num(central)>=2) )
                #selection.add('N_fwd>0_'+var,       (ak.num(fwd)>=1) )
                #selection.add('MET>30_'+var, (getattr(ev.MET, var)>30) )

                ### Don't change the selection for now...
                #bl_reqs = os_reqs + ['N_jet>2_'+var, 'MET>30_'+var, 'N_btag=2_'+var, 'N_central>1_'+var, 'N_fwd>0_'+var]
                #bl_reqs_d = { sel: True for sel in bl_reqs }
                #BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_'+var].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])
                BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
                output['N_fwd_'+var].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd])
                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_'+var].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb])
                output['N_central_'+var].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])


                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(jet.pt[:, 0:1][BL]),
                    eta = ak.flatten(jet.eta[:, 0:1][BL]),
                    phi = ak.flatten(jet.phi[:, 0:1][BL]),
                    weight = weight.weight()[BL]
                )
                
                output['b1_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta = ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi = ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight = weight.weight()[BL]
                )
                
                output['fwd_jet_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(high_p_fwd[BL].pt),
                    #p   = ak.flatten(high_p_fwd[BL].p),
                    eta = ak.flatten(high_p_fwd[BL].eta),
                    phi = ak.flatten(high_p_fwd[BL].phi),
                    weight = weight.weight()[BL]
                )

                BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])        
                output['MET_'+var].fill(
                    dataset = dataset,
                    pt  = getattr(ev.MET, var)[BL_minusMET],
                    phi  = ev.MET[BL_minusMET].phi,
                    weight = weight.weight()[BL_minusMET]
                )
        
        return output
 def process(self, events):
     output = self._accumulator.identity()
     jets=events.Jet
     jetSel = (jets.pt>30) & (abs(jets.eta)<2.4)
     tightJet = jets[jetSel]
     bJet = tightJet[tightJet.btagDeepFlavB > 0.642]
     muons = events.Muon
     muonSel = (muons.pt>30) & (abs(muons.eta)<2.4)
     tightMuon = muons[muonSel]
     ele = events.Electron
     eleSel = (ele.pt>35)&(abs(ele.eta)<2.4)
     tightEle = ele[eleSel]
     eventSel = (((ak.num(tightMuon)==1) | (ak.num(tightEle)==1)) &
         (ak.num(tightJet)>= 3) & (ak.num(bJet)>=1)
                )
     final = events[eventSel]
     
     
     #####GENPART MATCHING ######
     
     genPart = final.GenPart
     tops = genPart[abs(genPart.pdgId)==6]
     #The isLastCopy Flag filters out copy Genparticles:
     tops = tops[tops.hasFlags('isLastCopy')]
     tDecay = tops.distinctChildren
     tDecay = tDecay[tDecay.hasFlags('isLastCopy')]
     t_Events=tDecay[abs(tDecay.pdgId)==5]
     W = tDecay[abs(tDecay.pdgId)==24]
     W = W[W.hasFlags('isLastCopy')]
     WDecay = W.distinctChildren
     WDecay = WDecay[WDecay.hasFlags('isLastCopy')]
     #t_events is the lone bottom, W_events is the -> two jets
     #select the hadronically decaying W
     W_Events=ak.flatten(WDecay[ak.all(abs(WDecay.pdgId)<=8,axis=-1)],axis=3)
     #print(qqb)
     #HadW is mask for Quark deacying W boson
     hadW = ak.num(W_Events,axis=2)==2
     #filters out t_events that have a hadronically decayign W Boson
     hadB = t_Events[hadW]
     hadB = ak.flatten(hadB,axis=2)
     W_quarks = W_Events[hadW]
     W_quarks = ak.flatten(W_quarks,axis=2)
     #concatentating these two arrays make an array of events with the correctly decaying GenParticles.
     qqb = ak.concatenate([hadB,W_quarks],axis=1)
     
     
     #####GEN JET MATCHING ######
     final=final[(ak.count(qqb.pdgId,axis=1)==3)]
     finaljets=final.Jet
     qqb=qqb[(ak.count(qqb.pdgId,axis=1)==3)]
     #Implementing Tight Jet Cuts on Training Data
     finaljetSel=(abs(finaljets.eta)<2.4)&(finaljets.pt>30)
     finalJets=finaljets[finaljetSel]
     #Match Gen part to gen jet
     matchedGenJets=qqb.nearest(final.GenJet)
     #match gen to reco
     matchedJets=matchedGenJets.nearest(finalJets)
 
     ### VALIDATION ###
     test=matchedJets.genJetIdx
     combs=ak.combinations(finalJets,3,replacement=False)
     t1=(combs['0'].genJetIdx==test[:,0])|(combs['0'].genJetIdx==test[:,1])|(combs['0'].genJetIdx==test[:,2])
     t2=(combs['1'].genJetIdx==test[:,0])|(combs['1'].genJetIdx==test[:,1])|(combs['1'].genJetIdx==test[:,2])
     t3=(combs['2'].genJetIdx==test[:,0])|(combs['2'].genJetIdx==test[:,1])|(combs['2'].genJetIdx==test[:,2])
     t=t1&t2&t3
     
     trutharray=ak.flatten(t)
     jetcombos=ak.flatten(combs)
     j1,j2,j3=ak.unzip(jetcombos)
     output["dR12"]+=processor.column_accumulator(ak.to_numpy(j1.delta_r(j2)))
     output["dR13"]+=processor.column_accumulator(ak.to_numpy(j1.delta_r(j3)))
     output["dR23"]+=processor.column_accumulator(ak.to_numpy(j2.delta_r(j3)))
     output["j1btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2))
     output["j2btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2))
     output["j3btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2))
     output["j1area"]+=processor.column_accumulator(ak.to_numpy(j1.area))
     output["j2area"]+=processor.column_accumulator(ak.to_numpy(j2.area))
     output["j3area"]+=processor.column_accumulator(ak.to_numpy(j3.area))
     output["j12deta"]+=processor.column_accumulator(ak.to_numpy(j1.eta-j2.eta))
     output["j23deta"]+=processor.column_accumulator(ak.to_numpy(j2.eta-j3.eta))
     output["j13deta"]+=processor.column_accumulator(ak.to_numpy(j1.eta-j3.eta))
     output["j12dphi"]+=processor.column_accumulator(ak.to_numpy(j1.phi-j2.phi))
     output["j23dphi"]+=processor.column_accumulator(ak.to_numpy(j2.phi-j3.phi))
     output["j13dphi"]+=processor.column_accumulator(ak.to_numpy(j1.phi-j3.phi))
     output["j1j2mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass+j2.mass))
     output["j2j3mass"]+=processor.column_accumulator(ak.to_numpy(j2.mass+j3.mass))
     output["j1j3mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass+j3.mass))
     output["j1pt"]+=processor.column_accumulator(ak.to_numpy(j1.pt))
     output["j1phi"]+=processor.column_accumulator(ak.to_numpy(j1.phi))
     output["j1eta"]+=processor.column_accumulator(ak.to_numpy(abs(j1.eta)))
     output["j1mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass))
     output["j2pt"]+=processor.column_accumulator(ak.to_numpy(j2.pt))
     output["j2phi"]+=processor.column_accumulator(ak.to_numpy(j2.phi))
     output["j2eta"]+=processor.column_accumulator(ak.to_numpy(abs(j2.eta)))
     output["j2mass"]+=processor.column_accumulator(ak.to_numpy(j2.mass))
     output["j3pt"]+=processor.column_accumulator(ak.to_numpy(j3.pt))
     output["j3phi"]+=processor.column_accumulator(ak.to_numpy(j3.phi))
     output["j3eta"]+=processor.column_accumulator(ak.to_numpy(abs(j3.eta)))
     output["j3mass"]+=processor.column_accumulator(ak.to_numpy(j3.mass))
     output["event"]+=processor.column_accumulator(ak.to_numpy(ak.flatten(ak.broadcast_arrays(final.event,combs['0'].pt)[0])))
     output["truth"]+=processor.column_accumulator(ak.to_numpy(trutharray).astype(int))
     
     return output
Beispiel #15
0
    def process(self, df):
        if not df.size:
            return self.accumulator.identity()
        self._configure(df)
        dataset = df['dataset']
        df['is_lo_w'] = is_lo_w(dataset)
        df['is_lo_z'] = is_lo_z(dataset)
        df['is_lo_znunu'] = is_lo_znunu(dataset)
        df['is_lo_w_ewk'] = is_lo_w_ewk(dataset)
        df['is_lo_z_ewk'] = is_lo_z_ewk(dataset)
        df['is_lo_g'] = is_lo_g(dataset)
        df['is_nlo_z'] = is_nlo_z(dataset)
        df['is_nlo_w'] = is_nlo_w(dataset)
        df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[
            'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[
                'is_lo_w_ewk'] | df['is_lo_z_ewk']
        df['is_data'] = is_data(dataset)

        gen_v_pt = None
        if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[
                'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']:
            gen = setup_gen_candidates(df)
            dressed = setup_dressed_gen_candidates(df)
            fill_gen_v_info(df, gen, dressed)
            gen_v_pt = df['gen_v_pt_combined']
        elif df['is_lo_g']:
            gen = setup_gen_candidates(df)
            all_gen_photons = gen[(gen.pdg == 22)]
            prompt_mask = (all_gen_photons.status
                           == 1) & (all_gen_photons.flag & 1 == 1)
            stat1_mask = (all_gen_photons.status == 1)
            gen_photons = all_gen_photons[prompt_mask |
                                          (~prompt_mask.any()) & stat1_mask]
            gen_photon = gen_photons[gen_photons.pt.argmax()]

            gen_v_pt = gen_photon.pt.max()

        # Generator-level leading dijet mass
        if df['has_lhe_v_pt']:
            genjets = setup_lhe_cleaned_genjets(df)
            digenjet = genjets[:, :2].distincts()
            df['mjj_gen'] = digenjet.mass.max()
            df['mjj_gen'] = np.where(df['mjj_gen'] > 0, df['mjj_gen'], 0)

        # Candidates
        # Already pre-filtered!
        # All leptons are at least loose
        # Check out setup_candidates for filtering details
        met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates(
            df, cfg)

        # Remove jets in accordance with the noise recipe
        if df['year'] == 2017:
            ak4 = ak4[(ak4.ptraw > 50) | (ak4.abseta < 2.65) |
                      (ak4.abseta > 3.139)]
            bjets = bjets[(bjets.ptraw > 50) | (bjets.abseta < 2.65) |
                          (bjets.abseta > 3.139)]

        # Filtering ak4 jets according to pileup ID
        ak4 = ak4[ak4.puid]

        # Muons
        df['is_tight_muon'] = muons.tightId \
                      & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \
                      & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \
                      & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA)

        dimuons = muons.distincts()
        dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge']

        df['MT_mu'] = ((muons.counts == 1) *
                       mt(muons.pt, muons.phi, met_pt, met_phi)).max()

        # Electrons
        df['is_tight_electron'] = electrons.tightId \
                            & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \
                            & (electrons.absetasc < cfg.ELECTRON.CUTS.TIGHT.ETA)

        dielectrons = electrons.distincts()
        dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge']

        df['MT_el'] = ((electrons.counts == 1) *
                       mt(electrons.pt, electrons.phi, met_pt, met_phi)).max()

        # ak4
        leadak4_index = ak4.pt.argmax()

        elejet_pairs = ak4[:, :1].cross(electrons)
        df['dREleJet'] = np.hypot(
            elejet_pairs.i0.eta - elejet_pairs.i1.eta,
            dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min()
        muonjet_pairs = ak4[:, :1].cross(muons)
        df['dRMuonJet'] = np.hypot(
            muonjet_pairs.i0.eta - muonjet_pairs.i1.eta,
            dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min()

        # Recoil
        df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons,
                                                   muons, photons)

        df["dPFCaloSR"] = (met_pt - df["CaloMET_pt"]) / met_pt
        df["dPFCaloCR"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"]

        df["dPFTkSR"] = (met_pt - df["TkMET_pt"]) / met_pt

        df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4,
                                                  df['recoil_phi'],
                                                  njet=4,
                                                  ptmin=30,
                                                  etamax=5.0)
        df["minDPhiJetMet"] = min_dphi_jet_met(ak4,
                                               met_phi,
                                               njet=4,
                                               ptmin=30,
                                               etamax=5.0)
        selection = processor.PackedSelection()

        # Triggers
        pass_all = np.ones(df.size) == 1
        selection.add('inclusive', pass_all)
        selection = trigger_selection(selection, df, cfg)

        selection.add('mu_pt_trig_safe', muons.pt.max() > 30)

        # Common selection
        selection.add('veto_ele', electrons.counts == 0)
        selection.add('veto_muo', muons.counts == 0)
        selection.add('veto_photon', photons.counts == 0)
        selection.add('veto_tau', taus.counts == 0)
        selection.add('at_least_one_tau', taus.counts > 0)
        selection.add('veto_b', bjets.counts == 0)
        selection.add('mindphijr',
                      df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR)
        selection.add('mindphijm',
                      df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR)

        selection.add('dpfcalo_sr',
                      np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO)
        selection.add('dpfcalo_cr',
                      np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO)

        selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL)
        selection.add('met_sr', met_pt > cfg.SELECTION.SIGNAL.RECOIL)

        # AK4 dijet
        diak4 = ak4[:, :2].distincts()
        leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & (
            np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA)
        trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & (
            np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA)
        hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any()
        has_track0 = np.abs(diak4.i0.eta) <= 2.5
        has_track1 = np.abs(diak4.i1.eta) <= 2.5

        leadak4_id = diak4.i0.tightId & (has_track0 * (
            (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) &
            (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0)
        trailak4_id = has_track1 * (
            (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) &
            (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1

        df['mjj'] = diak4.mass.max()
        df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max())
        df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max()

        leading_jet_in_horn = ((diak4.i0.abseta < 3.2) &
                               (diak4.i0.abseta > 2.8)).any()
        trailing_jet_in_horn = ((diak4.i1.abseta < 3.2) &
                                (diak4.i1.abseta > 2.8)).any()

        selection.add('hornveto', (df['dPFTkSR'] < 0.8)
                      | ~(leading_jet_in_horn | trailing_jet_in_horn))

        if df['year'] == 2018:
            if df['is_data']:
                metphihem_mask = ~((met_phi > -1.8) & (met_phi < -0.6) &
                                   (df['run'] > 319077))
            else:
                metphihem_mask = pass_all
            selection.add("metphihemextveto", metphihem_mask)
            selection.add('no_el_in_hem',
                          electrons[electrons_in_hem(electrons)].counts == 0)
        else:
            selection.add("metphihemextveto", pass_all)
            selection.add('no_el_in_hem', pass_all)

        selection.add('two_jets', diak4.counts > 0)
        selection.add('leadak4_pt_eta', leadak4_pt_eta.any())
        selection.add('trailak4_pt_eta', trailak4_pt_eta.any())
        selection.add('hemisphere', hemisphere)
        selection.add('leadak4_id', leadak4_id.any())
        selection.add('trailak4_id', trailak4_id.any())
        selection.add('mjj',
                      df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS)
        selection.add(
            'dphijj',
            df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI)
        selection.add(
            'detajj',
            df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA)

        # Cleaning cuts for signal region
        max_neEmEF = np.maximum(diak4.i0.nef, diak4.i1.nef)
        selection.add('max_neEmEF', (max_neEmEF < 0.7).any())

        vec_b = calculate_vecB(ak4, met_pt, met_phi)
        vec_dphi = calculate_vecDPhi(ak4, met_pt, met_phi, df['TkMET_phi'])

        no_jet_in_trk = (diak4.i0.abseta > 2.5).any() & (diak4.i1.abseta >
                                                         2.5).any()
        no_jet_in_hf = (diak4.i0.abseta < 3.0).any() & (diak4.i1.abseta <
                                                        3.0).any()

        at_least_one_jet_in_hf = (diak4.i0.abseta >
                                  3.0).any() | (diak4.i1.abseta > 3.0).any()
        at_least_one_jet_in_trk = (diak4.i0.abseta <
                                   2.5).any() | (diak4.i1.abseta < 2.5).any()

        # Categorized cleaning cuts
        eemitigation = ((no_jet_in_hf | at_least_one_jet_in_trk) &
                        (vec_dphi < 1.0)) | (
                            (no_jet_in_trk & at_least_one_jet_in_hf) &
                            (vec_b < 0.2))

        selection.add('eemitigation', eemitigation)

        # HF-HF veto in SR
        both_jets_in_hf = (diak4.i0.abseta > 3.0) & (diak4.i1.abseta > 3.0)
        selection.add('veto_hfhf', ~both_jets_in_hf.any())

        # Divide into three categories for trigger study
        if cfg.RUN.TRIGGER_STUDY:
            two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs(
                diak4.i1.eta) <= 2.4)
            two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs(
                diak4.i1.eta) > 2.4)
            one_jet_forward_one_jet_central = (~two_central_jets) & (
                ~two_forward_jets)
            selection.add('two_central_jets', two_central_jets.any())
            selection.add('two_forward_jets', two_forward_jets.any())
            selection.add('one_jet_forward_one_jet_central',
                          one_jet_forward_one_jet_central.any())

        # Dimuon CR
        leadmuon_index = muons.pt.argmax()
        selection.add('at_least_one_tight_mu', df['is_tight_muon'].any())
        selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \
                                    & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any())
        selection.add('dimuon_charge', (dimuon_charge == 0).any())
        selection.add('two_muons', muons.counts == 2)

        # Single muon CR
        selection.add('one_muon', muons.counts == 1)
        selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT)

        # Diele CR
        leadelectron_index = electrons.pt.argmax()

        selection.add('one_electron', electrons.counts == 1)
        selection.add('two_electrons', electrons.counts == 2)
        selection.add('at_least_one_tight_el', df['is_tight_electron'].any())


        selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN)  \
                                        & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any())
        selection.add('dielectron_charge', (dielectron_charge == 0).any())

        # Single Ele CR
        selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET)
        selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT)

        # Photon CR
        leadphoton_index = photons.pt.argmax()

        df['is_tight_photon'] = photons.mediumId & photons.barrel

        selection.add('one_photon', photons.counts == 1)
        selection.add('at_least_one_tight_photon', df['is_tight_photon'].any())
        selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT)
        selection.add('photon_pt_trig',
                      photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG)

        # Fill histograms
        output = self.accumulator.identity()

        # Gen
        if df['has_lhe_v_pt']:
            output['genvpt_check'].fill(vpt=gen_v_pt,
                                        type="Nano",
                                        dataset=dataset)

        if 'LHE_Njets' in df:
            output['lhe_njets'].fill(dataset=dataset,
                                     multiplicity=df['LHE_Njets'])
        if 'LHE_HT' in df:
            output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT'])
        if 'LHE_HTIncoming' in df:
            output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming'])

        # Weights
        evaluator = evaluator_from_config(cfg)

        weights = processor.Weights(size=df.size, storeIndividual=True)
        if not df['is_data']:
            weights.add('gen', df['Generator_weight'])

            try:
                weights.add('prefire', df['PrefireWeight'])
            except KeyError:
                weights.add('prefire', np.ones(df.size))

            weights = candidate_weights(weights, df, evaluator, muons,
                                        electrons, photons, cfg)
            weights = pileup_weights(weights, df, evaluator, cfg)
            weights = ak4_em_frac_weights(weights, diak4, evaluator)
            if not (gen_v_pt is None):
                weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt,
                                             df['mjj_gen'])

        # Save per-event values for synchronization
        if cfg.RUN.KINEMATICS.SAVE:
            for event in cfg.RUN.KINEMATICS.EVENTS:
                mask = df['event'] == event
                if not mask.any():
                    continue
                output['kinematics']['event'] += [event]
                output['kinematics']['met'] += [met_pt[mask]]
                output['kinematics']['met_phi'] += [met_phi[mask]]
                output['kinematics']['recoil'] += [df['recoil_pt'][mask]]
                output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]]

                output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt]
                output['kinematics']['ak4eta0'] += [
                    ak4[leadak4_index][mask].eta
                ]
                output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask]

                output['kinematics']['nLooseMu'] += [muons.counts[mask]]
                output['kinematics']['nTightMu'] += [
                    muons[df['is_tight_muon']].counts[mask]
                ]
                output['kinematics']['mupt0'] += [
                    muons[leadmuon_index][mask].pt
                ]
                output['kinematics']['mueta0'] += [
                    muons[leadmuon_index][mask].eta
                ]

                output['kinematics']['nLooseEl'] += [electrons.counts[mask]]
                output['kinematics']['nTightEl'] += [
                    electrons[df['is_tight_electron']].counts[mask]
                ]
                output['kinematics']['elpt0'] += [
                    electrons[leadelectron_index][mask].pt
                ]
                output['kinematics']['eleta0'] += [
                    electrons[leadelectron_index][mask].eta
                ]

                output['kinematics']['nLooseGam'] += [photons.counts[mask]]
                output['kinematics']['nTightGam'] += [
                    photons[df['is_tight_photon']].counts[mask]
                ]
                output['kinematics']['gpt0'] += [
                    photons[leadphoton_index][mask].pt
                ]
                output['kinematics']['geta0'] += [
                    photons[leadphoton_index][mask].eta
                ]

        # Sum of all weights to use for normalization
        # TODO: Deal with systematic variations
        output['nevents'][dataset] += df.size
        if not df['is_data']:
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']
            output['sumw_pileup'][dataset] += weights._weights['pileup'].sum()

        regions = vbfhinv_regions(cfg)

        # Get veto weights (only for MC)
        if not df['is_data']:
            veto_weights = get_veto_weights(df, cfg, evaluator, electrons,
                                            muons, taus)

        for region, cuts in regions.items():
            exclude = [None]
            region_weights = copy.deepcopy(weights)

            if not df['is_data']:
                ### Trigger weights
                if re.match(r'cr_(\d+)e.*', region):
                    p_pass_data = 1 - (1 -
                                       evaluator["trigger_electron_eff_data"]
                                       (electrons.etasc, electrons.pt)).prod()
                    p_pass_mc = 1 - (1 - evaluator["trigger_electron_eff_mc"]
                                     (electrons.etasc, electrons.pt)).prod()
                    trigger_weight = p_pass_data / p_pass_mc
                    trigger_weight[np.isnan(trigger_weight)] = 1
                    region_weights.add('trigger', trigger_weight)
                elif re.match(r'cr_(\d+)m.*', region) or re.match(
                        'sr_.*', region):
                    region_weights.add(
                        'trigger_met',
                        evaluator["trigger_met"](df['recoil_pt']))
                elif re.match(r'cr_g.*', region):
                    photon_trigger_sf(region_weights, photons, df)

                # Veto weights
                if re.match('.*no_veto.*', region):
                    exclude = [
                        "muon_id_iso_tight", "muon_id_tight", "muon_iso_tight",
                        "muon_id_loose", "muon_iso_loose", "ele_reco",
                        "ele_id_tight", "ele_id_loose", "tau_id"
                    ]
                    region_weights.add(
                        "veto",
                        veto_weights.partial_weight(include=["nominal"]))

                # HEM-veto weights for signal region MC
                if re.match('^sr_vbf.*', region) and df['year'] == 2018:
                    # Events that lie in the HEM-veto region
                    events_to_weight_mask = (met_phi > -1.8) & (met_phi < -0.6)
                    # Weight is the "good lumi fraction" for 2018
                    weight = 21.1 / 59.7
                    hem_weight = np.where(events_to_weight_mask, weight, 1.0)

                    region_weights.add("hem_weight", hem_weight)

            # This is the default weight for this region
            rweight = region_weights.partial_weight(exclude=exclude)

            # Blinding
            if (self._blind and df['is_data'] and region.startswith('sr')):
                continue

            # Cutflow plot for signal and control regions
            if any(x in region for x in ["sr", "cr", "tr"]):
                output['cutflow_' + region][dataset]['all'] += df.size
                for icut, cutname in enumerate(cuts):
                    output['cutflow_' +
                           region][dataset][cutname] += selection.all(
                               *cuts[:icut + 1]).sum()

            mask = selection.all(*cuts)

            if cfg.RUN.SAVE.TREE:
                if region in ['cr_1e_vbf', 'cr_1m_vbf']:
                    output['tree_int64'][region][
                        "event"] += processor.column_accumulator(
                            df["event"][mask])
                    output['tree_float16'][region][
                        "gen_v_pt"] += processor.column_accumulator(
                            np.float16(gen_v_pt[mask]))
                    output['tree_float16'][region][
                        "gen_mjj"] += processor.column_accumulator(
                            np.float16(df['mjj_gen'][mask]))
                    output['tree_float16'][region][
                        "recoil_pt"] += processor.column_accumulator(
                            np.float16(df["recoil_pt"][mask]))
                    output['tree_float16'][region][
                        "recoil_phi"] += processor.column_accumulator(
                            np.float16(df["recoil_phi"][mask]))
                    output['tree_float16'][region][
                        "mjj"] += processor.column_accumulator(
                            np.float16(df["mjj"][mask]))

                    output['tree_float16'][region][
                        "leadak4_pt"] += processor.column_accumulator(
                            np.float16(diak4.i0.pt[mask]))
                    output['tree_float16'][region][
                        "leadak4_eta"] += processor.column_accumulator(
                            np.float16(diak4.i0.eta[mask]))
                    output['tree_float16'][region][
                        "leadak4_phi"] += processor.column_accumulator(
                            np.float16(diak4.i0.phi[mask]))

                    output['tree_float16'][region][
                        "trailak4_pt"] += processor.column_accumulator(
                            np.float16(diak4.i1.pt[mask]))
                    output['tree_float16'][region][
                        "trailak4_eta"] += processor.column_accumulator(
                            np.float16(diak4.i1.eta[mask]))
                    output['tree_float16'][region][
                        "trailak4_phi"] += processor.column_accumulator(
                            np.float16(diak4.i1.phi[mask]))

                    output['tree_float16'][region][
                        "minDPhiJetRecoil"] += processor.column_accumulator(
                            np.float16(df["minDPhiJetRecoil"][mask]))
                    if '_1e_' in region:
                        output['tree_float16'][region][
                            "leadlep_pt"] += processor.column_accumulator(
                                np.float16(electrons.pt.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_eta"] += processor.column_accumulator(
                                np.float16(electrons[
                                    electrons.pt.argmax()].eta.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_phi"] += processor.column_accumulator(
                                np.float16(electrons[
                                    electrons.pt.argmax()].phi.max()[mask]))
                    elif '_1m_' in region:
                        output['tree_float16'][region][
                            "leadlep_pt"] += processor.column_accumulator(
                                np.float16(muons.pt.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_eta"] += processor.column_accumulator(
                                np.float16(
                                    muons[muons.pt.argmax()].eta.max()[mask]))
                        output['tree_float16'][region][
                            "leadlep_phi"] += processor.column_accumulator(
                                np.float16(
                                    muons[muons.pt.argmax()].phi.max()[mask]))

                    for name, w in region_weights._weights.items():
                        output['tree_float16'][region][
                            f"weight_{name}"] += processor.column_accumulator(
                                np.float16(w[mask]))
                    output['tree_float16'][region][
                        f"weight_total"] += processor.column_accumulator(
                            np.float16(rweight[mask]))
                if region == 'inclusive':
                    output['tree_int64'][region][
                        "event"] += processor.column_accumulator(
                            df["event"][mask])
                    for name in selection.names:
                        output['tree_bool'][region][
                            name] += processor.column_accumulator(
                                np.bool_(selection.all(*[name])[mask]))
            # Save the event numbers of events passing this selection
            # Save the event numbers of events passing this selection
            if cfg.RUN.SAVE.PASSING:
                output['selected_events'][region] += list(df['event'][mask])

            # Multiplicities
            def fill_mult(name, candidates):
                output[name].fill(dataset=dataset,
                                  region=region,
                                  multiplicity=candidates[mask].counts,
                                  weight=rweight[mask])

            fill_mult('ak4_mult', ak4[ak4.pt > 30])
            fill_mult('bjet_mult', bjets)
            fill_mult('loose_ele_mult', electrons)
            fill_mult('tight_ele_mult', electrons[df['is_tight_electron']])
            fill_mult('loose_muo_mult', muons)
            fill_mult('tight_muo_mult', muons[df['is_tight_muon']])
            fill_mult('tau_mult', taus)
            fill_mult('photon_mult', photons)

            def ezfill(name, **kwargs):
                """Helper function to make filling easier."""
                output[name].fill(dataset=dataset, region=region, **kwargs)

            # Monitor weights
            for wname, wvalue in region_weights._weights.items():
                ezfill("weights", weight_type=wname, weight_value=wvalue[mask])
                ezfill("weights_wide",
                       weight_type=wname,
                       weight_value=wvalue[mask])

            # All ak4
            # This is a workaround to create a weight array of the right dimension
            w_alljets = weight_shape(ak4[mask].eta, rweight[mask])
            w_alljets_nopref = weight_shape(
                ak4[mask].eta,
                region_weights.partial_weight(exclude=exclude +
                                              ['prefire'])[mask])

            ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets)
            ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets)
            ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets)

            ezfill('ak4_eta_nopref',
                   jeteta=ak4[mask].eta.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_phi_nopref',
                   jetphi=ak4[mask].phi.flatten(),
                   weight=w_alljets_nopref)
            ezfill('ak4_pt_nopref',
                   jetpt=ak4[mask].pt.flatten(),
                   weight=w_alljets_nopref)

            # Leading ak4
            w_diak4 = weight_shape(diak4.pt[mask], rweight[mask])
            ezfill('ak4_eta0',
                   jeteta=diak4.i0.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi0',
                   jetphi=diak4.i0.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt0',
                   jetpt=diak4.i0.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw0',
                   jetpt=diak4.i0.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf0',
                   frac=diak4.i0.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf0',
                   frac=diak4.i0.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst0',
                   nconst=diak4.i0.nconst[mask].flatten(),
                   weight=w_diak4)

            # Trailing ak4
            ezfill('ak4_eta1',
                   jeteta=diak4.i1.eta[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_phi1',
                   jetphi=diak4.i1.phi[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_pt1',
                   jetpt=diak4.i1.pt[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_ptraw1',
                   jetpt=diak4.i1.ptraw[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_chf1',
                   frac=diak4.i1.chf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nhf1',
                   frac=diak4.i1.nhf[mask].flatten(),
                   weight=w_diak4)
            ezfill('ak4_nconst1',
                   nconst=diak4.i1.nconst[mask].flatten(),
                   weight=w_diak4)

            # B tag discriminator
            btag = getattr(ak4, cfg.BTAG.ALGO)
            w_btag = weight_shape(btag[mask], rweight[mask])
            ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag)

            # MET
            ezfill('dpfcalo_cr',
                   dpfcalo=df["dPFCaloCR"][mask],
                   weight=rweight[mask])
            ezfill('dpfcalo_sr',
                   dpfcalo=df["dPFCaloSR"][mask],
                   weight=rweight[mask])
            ezfill('met', met=met_pt[mask], weight=rweight[mask])
            ezfill('met_phi', phi=met_phi[mask], weight=rweight[mask])
            ezfill('recoil',
                   recoil=df["recoil_pt"][mask],
                   weight=rweight[mask])
            ezfill('recoil_phi',
                   phi=df["recoil_phi"][mask],
                   weight=rweight[mask])
            ezfill('dphijm',
                   dphi=df["minDPhiJetMet"][mask],
                   weight=rweight[mask])
            ezfill('dphijr',
                   dphi=df["minDPhiJetRecoil"][mask],
                   weight=rweight[mask])

            ezfill('dphijj', dphi=df["dphijj"][mask], weight=rweight[mask])
            ezfill('detajj', deta=df["detajj"][mask], weight=rweight[mask])
            ezfill('mjj', mjj=df["mjj"][mask], weight=rweight[mask])

            if gen_v_pt is not None:
                ezfill('gen_vpt',
                       vpt=gen_v_pt[mask],
                       weight=df['Generator_weight'][mask])
                ezfill('gen_mjj',
                       mjj=df['mjj_gen'][mask],
                       weight=df['Generator_weight'][mask])

            # Photon CR data-driven QCD estimate
            if df['is_data'] and re.match("cr_g.*", region) and re.match(
                    "(SinglePhoton|EGamma).*", dataset):
                w_imp = photon_impurity_weights(
                    photons[leadphoton_index].pt.max()[mask], df["year"])
                output['mjj'].fill(dataset=data_driven_qcd_dataset(dataset),
                                   region=region,
                                   mjj=df["mjj"][mask],
                                   weight=rweight[mask] * w_imp)
                output['recoil'].fill(dataset=data_driven_qcd_dataset(dataset),
                                      region=region,
                                      recoil=df["recoil_pt"][mask],
                                      weight=rweight[mask] * w_imp)

            # Uncertainty variations
            if df['is_lo_z'] or df['is_nlo_z'] or df['is_lo_z_ewk']:
                theory_uncs = [x for x in cfg.SF.keys() if x.startswith('unc')]
                for unc in theory_uncs:
                    reweight = evaluator[unc](gen_v_pt)
                    w = (region_weights.weight() * reweight)[mask]
                    ezfill('mjj_unc',
                           mjj=df['mjj'][mask],
                           uncertainty=unc,
                           weight=w)

            # Two dimensional
            ezfill('recoil_mjj',
                   recoil=df["recoil_pt"][mask],
                   mjj=df["mjj"][mask],
                   weight=rweight[mask])

            # Muons
            if '_1m_' in region or '_2m_' in region or 'no_veto' in region:
                w_allmu = weight_shape(muons.pt[mask], rweight[mask])
                ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu)
                ezfill('muon_pt_abseta',
                       pt=muons.pt[mask].flatten(),
                       abseta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_mt', mt=df['MT_mu'][mask], weight=rweight[mask])
                ezfill('muon_eta',
                       eta=muons.eta[mask].flatten(),
                       weight=w_allmu)
                ezfill('muon_phi',
                       phi=muons.phi[mask].flatten(),
                       weight=w_allmu)

            # Dimuon
            if '_2m_' in region:
                w_dimu = weight_shape(dimuons.pt[mask], rweight[mask])
                ezfill('muon_pt0',
                       pt=dimuons.i0.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_pt1',
                       pt=dimuons.i1.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta0',
                       eta=dimuons.i0.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_eta1',
                       eta=dimuons.i1.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi0',
                       phi=dimuons.i0.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('muon_phi1',
                       phi=dimuons.i1.phi[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_pt',
                       pt=dimuons.pt[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_eta',
                       eta=dimuons.eta[mask].flatten(),
                       weight=w_dimu)
                ezfill('dimuon_mass',
                       dilepton_mass=dimuons.mass[mask].flatten(),
                       weight=w_dimu)

            # Electrons
            if '_1e_' in region or '_2e_' in region or 'no_veto' in region:
                w_allel = weight_shape(electrons.pt[mask], rweight[mask])
                ezfill('electron_pt',
                       pt=electrons.pt[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_pt_eta',
                       pt=electrons.pt[mask].flatten(),
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_mt',
                       mt=df['MT_el'][mask],
                       weight=rweight[mask])
                ezfill('electron_eta',
                       eta=electrons.eta[mask].flatten(),
                       weight=w_allel)
                ezfill('electron_phi',
                       phi=electrons.phi[mask].flatten(),
                       weight=w_allel)

            # Dielectron
            if '_2e_' in region:
                w_diel = weight_shape(dielectrons.pt[mask], rweight[mask])
                ezfill('electron_pt0',
                       pt=dielectrons.i0.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_pt1',
                       pt=dielectrons.i1.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta0',
                       eta=dielectrons.i0.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_eta1',
                       eta=dielectrons.i1.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi0',
                       phi=dielectrons.i0.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('electron_phi1',
                       phi=dielectrons.i1.phi[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_pt',
                       pt=dielectrons.pt[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_eta',
                       eta=dielectrons.eta[mask].flatten(),
                       weight=w_diel)
                ezfill('dielectron_mass',
                       dilepton_mass=dielectrons.mass[mask].flatten(),
                       weight=w_diel)

            # Photon
            if '_g_' in region:
                w_leading_photon = weight_shape(
                    photons[leadphoton_index].pt[mask], rweight[mask])
                ezfill('photon_pt0',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_eta0',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_phi0',
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)
                ezfill('photon_pt0_recoil',
                       pt=photons[leadphoton_index].pt[mask].flatten(),
                       recoil=df['recoil_pt'][mask
                                              & (leadphoton_index.counts > 0)],
                       weight=w_leading_photon)
                ezfill('photon_eta_phi',
                       eta=photons[leadphoton_index].eta[mask].flatten(),
                       phi=photons[leadphoton_index].phi[mask].flatten(),
                       weight=w_leading_photon)

                # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], rweight[mask])

            # Tau
            if 'no_veto' in region:
                w_all_taus = weight_shape(taus.pt[mask], rweight[mask])
                ezfill("tau_pt", pt=taus.pt[mask].flatten(), weight=w_all_taus)

            # PV
            ezfill('npv', nvtx=df['PV_npvs'][mask], weight=rweight[mask])
            ezfill('npvgood',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=rweight[mask])

            ezfill('npv_nopu',
                   nvtx=df['PV_npvs'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
            ezfill('npvgood_nopu',
                   nvtx=df['PV_npvsGood'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])

            ezfill('rho_all',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=region_weights.partial_weight(exclude=exclude)[mask])
            ezfill('rho_central',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=region_weights.partial_weight(exclude=exclude)[mask])
            ezfill('rho_all_nopu',
                   rho=df['fixedGridRhoFastjetAll'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
            ezfill('rho_central_nopu',
                   rho=df['fixedGridRhoFastjetCentral'][mask],
                   weight=region_weights.partial_weight(exclude=exclude +
                                                        ['pileup'])[mask])
        return output
    def __init__(self, dphi_control=False, data_type='sig'):
        self.dphi_control = dphi_control
        self.data_type = data_type

        dataset_axis = hist.Cat('dataset', 'dataset')
        self._accumulator = processor.dict_accumulator({
            'all05':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'nopu05':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'dbeta':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'all05w':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'nopu05w':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'dbetaw':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'pt':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'eta':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'wgt':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'ljtype':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'channel':
            processor.column_accumulator(np.zeros(shape=(0, ))),
        })

        self.pucorrs = get_pu_weights_function()
        ## NOT applied for now
        self.nlo_w = get_nlo_weight_function('w')
        self.nlo_z = get_nlo_weight_function('z')
    def process_shift(self, events, shift_name):
        dataset = events.metadata['dataset']
        isRealData = not hasattr(events, "genWeight")
        selection = PackedSelection()
        weights = Weights(len(events), storeIndividual=True)
        output = self.make_output()
        if shift_name is None and not isRealData:
            output['sumw'] = ak.sum(events.genWeight)

        if isRealData or self._newTrigger:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._triggers[self._year]:
                if t in events.HLT.fields:
                    trigger = trigger | events.HLT[t]
            selection.add('trigger', trigger)
            del trigger
        else:
            selection.add('trigger', np.ones(len(events), dtype='bool'))

        if isRealData:
            selection.add(
                'lumimask', lumiMasks[self._year](events.run,
                                                  events.luminosityBlock))
        else:
            selection.add('lumimask', np.ones(len(events), dtype='bool'))

        if isRealData and self._skipRunB and self._year == '2017':
            selection.add('dropB', events.run > 299329)
        else:
            selection.add('dropB', np.ones(len(events), dtype='bool'))

        if isRealData:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._muontriggers[self._year]:
                if t in events.HLT.fields:
                    trigger |= np.array(events.HLT[t])
            selection.add('muontrigger', trigger)
            del trigger
        else:
            selection.add('muontrigger', np.ones(len(events), dtype='bool'))

        metfilter = np.ones(len(events), dtype='bool')
        for flag in self._met_filters[
                self._year]['data' if isRealData else 'mc']:
            metfilter &= np.array(events.Flag[flag])
        selection.add('metfilter', metfilter)
        del metfilter

        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)
        fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year]

        candidatejet = fatjets[
            # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
            & fatjets.isTight  # this is loose in sampleContainer
        ]

        candidatejet = candidatejet[:, :
                                    2]  # Only consider first two to match generators
        if self._jet_arbitration == 'pt':
            candidatejet = ak.firsts(candidatejet)
        elif self._jet_arbitration == 'mass':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.msdcorr, axis=1, keepdims=True)])
        elif self._jet_arbitration == 'n2':
            candidatejet = ak.firsts(candidatejet[ak.argmin(candidatejet.n2ddt,
                                                            axis=1,
                                                            keepdims=True)])
        elif self._jet_arbitration == 'ddb':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.btagDDBvLV2, axis=1, keepdims=True)])
        elif self._jet_arbitration == 'ddc':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.btagDDCvLV2, axis=1, keepdims=True)])
        else:
            raise RuntimeError("Unknown candidate jet arbitration")

        if self._tagger == 'v1':
            bvl = candidatejet.btagDDBvL
            cvl = candidatejet.btagDDCvL
            cvb = candidatejet.btagDDCvB
        elif self._tagger == 'v2':
            bvl = candidatejet.btagDDBvLV2
            cvl = candidatejet.btagDDCvLV2
            cvb = candidatejet.btagDDCvBV2
        elif self._tagger == 'v3':
            bvl = candidatejet.particleNetMD_Xbb
            cvl = candidatejet.particleNetMD_Xcc / (
                1 - candidatejet.particleNetMD_Xbb)
            cvb = candidatejet.particleNetMD_Xcc / (
                candidatejet.particleNetMD_Xcc +
                candidatejet.particleNetMD_Xbb)

        elif self._tagger == 'v4':
            bvl = candidatejet.particleNetMD_Xbb
            cvl = candidatejet.btagDDCvLV2
            cvb = candidatejet.particleNetMD_Xcc / (
                candidatejet.particleNetMD_Xcc +
                candidatejet.particleNetMD_Xbb)
        else:
            raise ValueError("Not an option")

        selection.add('minjetkin', (candidatejet.pt >= 450)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('_strict_mass', (candidatejet.msdcorr > 85) &
                      (candidatejet.msdcorr < 130))
        selection.add('_high_score', cvl > 0.8)
        selection.add('minjetkinmu', (candidatejet.pt >= 400)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('minjetkinw', (candidatejet.pt >= 200)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('jetid', candidatejet.isTight)
        selection.add('n2ddt', (candidatejet.n2ddt < 0.))
        if not self._tagger == 'v2':
            selection.add('ddbpass', (bvl >= 0.89))
            selection.add('ddcpass', (cvl >= 0.83))
            selection.add('ddcvbpass', (cvb >= 0.2))
        else:
            selection.add('ddbpass', (bvl >= 0.7))
            selection.add('ddcpass', (cvl >= 0.45))
            selection.add('ddcvbpass', (cvb >= 0.03))

        jets = events.Jet
        jets = jets[(jets.pt > 30.) & (abs(jets.eta) < 2.5) & jets.isTight]
        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        dphi = abs(jets.delta_phi(candidatejet))
        selection.add(
            'antiak4btagMediumOppHem',
            ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch],
                   axis=1,
                   mask_identity=False) <
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])
        ak4_away = jets[dphi > 0.8]
        selection.add(
            'ak4btagMedium08',
            ak.max(ak4_away[self._ak4tagBranch], axis=1, mask_identity=False) >
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])

        met = events.MET
        selection.add('met', met.pt < 140.)

        goodmuon = ((events.Muon.pt > 10)
                    & (abs(events.Muon.eta) < 2.4)
                    & (events.Muon.pfRelIso04_all < 0.25)
                    & events.Muon.looseId)
        nmuons = ak.sum(goodmuon, axis=1)
        leadingmuon = ak.firsts(events.Muon[goodmuon])

        if self._looseTau:
            goodelectron = ((events.Electron.pt > 10)
                            & (abs(events.Electron.eta) < 2.5)
                            &
                            (events.Electron.cutBased >= events.Electron.VETO))
            nelectrons = ak.sum(goodelectron, axis=1)

            ntaus = ak.sum(
                ((events.Tau.pt > 20)
                 & (abs(events.Tau.eta) < 2.3)
                 & events.Tau.idDecayMode
                 & ((events.Tau.idMVAoldDM2017v2 & 2) != 0)
                 & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4,
                          axis=2)
                 & ak.all(events.Tau.metric_table(
                     events.Electron[goodelectron]) > 0.4,
                          axis=2)),
                axis=1,
            )
        else:
            goodelectron = (
                (events.Electron.pt > 10)
                & (abs(events.Electron.eta) < 2.5)
                & (events.Electron.cutBased >= events.Electron.LOOSE))
            nelectrons = ak.sum(goodelectron, axis=1)

            ntaus = ak.sum(
                (events.Tau.pt > 20)
                &
                events.Tau.idDecayMode  # bacon iso looser than Nano selection
                & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4,
                         axis=2)
                & ak.all(events.Tau.metric_table(events.Electron[goodelectron])
                         > 0.4,
                         axis=2),
                axis=1,
            )

        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon',
                      (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin',
                      (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1))
        selection.add('muonDphiAK8',
                      abs(leadingmuon.delta_phi(candidatejet)) > 2 * np.pi / 3)

        # W-Tag (Tag and Probe)
        # tag side
        selection.add(
            'ak4btagMediumOppHem',
            ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch],
                   axis=1,
                   mask_identity=False) >
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])
        selection.add('met40p', met.pt > 40.)
        selection.add('tightMuon',
                      (leadingmuon.tightId) & (leadingmuon.pt > 53.))
        # selection.add('ptrecoW', (leadingmuon + met).pt > 250.)
        selection.add('ptrecoW200', (leadingmuon + met).pt > 200.)
        selection.add(
            'ak4btagNearMu',
            leadingmuon.delta_r(leadingmuon.nearest(ak4_away, axis=None)) <
            2.0)
        _bjets = jets[self._ak4tagBranch] > BTagEfficiency.btagWPs[
            self._ak4tagger][self._year]['medium']
        # _nearAK8 = jets.delta_r(candidatejet)  < 0.8
        # _nearMu = jets.delta_r(ak.firsts(events.Muon))  < 0.3
        # selection.add('ak4btagOld', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)
        _nearAK8 = jets.delta_r(candidatejet) < 0.8
        _nearMu = jets.delta_r(leadingmuon) < 0.3
        selection.add('ak4btagOld',
                      ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)

        # _nearAK8 = jets.delta_r(candidatejet)  < 0.8
        # _nearMu = jets.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None))  < 0.3
        # selection.add('ak4btagNew', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)

        # probe side
        selection.add('minWjetpteta',
                      (candidatejet.pt >= 200) & (abs(candidatejet.eta) < 2.4))
        # selection.add('noNearMuon', candidatejet.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None)) > 1.0)
        selection.add('noNearMuon', candidatejet.delta_r(leadingmuon) > 1.0)
        #####

        if isRealData:
            genflavor = ak.zeros_like(candidatejet.pt)
        else:
            if 'HToCC' in dataset or 'HToBB' in dataset:
                if self._ewkHcorr:
                    add_HiggsEW_kFactors(weights, events.GenPart, dataset)

            weights.add('genweight', events.genWeight)
            if "PSWeight" in events.fields:
                add_ps_weight(weights, events.PSWeight)
            else:
                add_ps_weight(weights, None)
            if "LHEPdfWeight" in events.fields:
                add_pdf_weight(weights, events.LHEPdfWeight)
            else:
                add_pdf_weight(weights, None)
            if "LHEScaleWeight" in events.fields:
                add_scalevar_7pt(weights, events.LHEScaleWeight)
                add_scalevar_3pt(weights, events.LHEScaleWeight)
            else:
                add_scalevar_7pt(weights, [])
                add_scalevar_3pt(weights, [])

            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            bosons = getBosons(events.GenPart)
            matchedBoson = candidatejet.nearest(bosons,
                                                axis=None,
                                                threshold=0.8)
            if self._tightMatch:
                match_mask = (
                    (candidatejet.pt - matchedBoson.pt) / matchedBoson.pt <
                    0.5) & ((candidatejet.msdcorr - matchedBoson.mass) /
                            matchedBoson.mass < 0.3)
                selmatchedBoson = ak.mask(matchedBoson, match_mask)
                genflavor = bosonFlavor(selmatchedBoson)
            else:
                genflavor = bosonFlavor(matchedBoson)
            genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
            if self._newVjetsKfactor:
                add_VJets_kFactors(weights, events.GenPart, dataset)
            else:
                add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            if shift_name is None:
                output['btagWeight'].fill(val=self._btagSF.addBtagWeight(
                    weights, ak4_away, self._ak4tagBranch))
            if self._nnlops_rew and dataset in [
                    'GluGluHToCC_M125_13TeV_powheg_pythia8'
            ]:
                weights.add('minlo_rew',
                            powheg_to_nnlops(ak.to_numpy(genBosonPt)))

            if self._newTrigger:
                add_jetTriggerSF(
                    weights, ak.firsts(fatjets),
                    self._year if not self._skipRunB else f'{self._year}CDEF',
                    selection)
            else:
                add_jetTriggerWeight(weights, candidatejet.msdcorr,
                                     candidatejet.pt, self._year)

            add_mutriggerSF(weights, leadingmuon, self._year, selection)
            add_mucorrectionsSF(weights, leadingmuon, self._year, selection)

            if self._year in ("2016", "2017"):
                weights.add("L1Prefiring", events.L1PreFiringWeight.Nom,
                            events.L1PreFiringWeight.Up,
                            events.L1PreFiringWeight.Dn)

            logger.debug("Weight statistics: %r" % weights.weightStatistics)

        msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * (
            genflavor > 0) + candidatejet.msdcorr * (genflavor == 0)

        regions = {
            'signal': [
                'noleptons', 'minjetkin', 'met', 'metfilter', 'jetid',
                'antiak4btagMediumOppHem', 'n2ddt', 'trigger', 'lumimask'
            ],
            'signal_noddt': [
                'noleptons', 'minjetkin', 'met', 'jetid',
                'antiak4btagMediumOppHem', 'trigger', 'lumimask', 'metfilter'
            ],
            # 'muoncontrol': ['minjetkinmu', 'jetid', 'n2ddt', 'ak4btagMedium08', 'onemuon', 'muonkin', 'muonDphiAK8', 'muontrigger', 'lumimask', 'metfilter'],
            'muoncontrol': [
                'onemuon', 'muonkin', 'muonDphiAK8', 'metfilter',
                'minjetkinmu', 'jetid', 'ak4btagMedium08', 'n2ddt',
                'muontrigger', 'lumimask'
            ],
            'muoncontrol_noddt': [
                'onemuon', 'muonkin', 'muonDphiAK8', 'jetid', 'metfilter',
                'minjetkinmu', 'jetid', 'ak4btagMedium08', 'muontrigger',
                'lumimask'
            ],
            'wtag': [
                'onemuon', 'tightMuon', 'minjetkinw', 'jetid', 'met40p',
                'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger',
                'lumimask'
            ],
            'wtag0': [
                'onemuon', 'tightMuon', 'met40p', 'metfilter', 'ptrecoW200',
                'ak4btagOld', 'muontrigger', 'lumimask'
            ],
            'wtag2': [
                'onemuon', 'tightMuon', 'minjetkinw', 'jetid',
                'ak4btagMediumOppHem', 'met40p', 'metfilter', 'ptrecoW200',
                'ak4btagOld', 'muontrigger', 'lumimask'
            ],
            'noselection': [],
        }

        def normalize(val, cut):
            if cut is None:
                ar = ak.to_numpy(ak.fill_none(val, np.nan))
                return ar
            else:
                ar = ak.to_numpy(ak.fill_none(val[cut], np.nan))
                return ar

        import time
        tic = time.time()
        if shift_name is None:
            for region, cuts in regions.items():
                allcuts = set([])
                cut = selection.all(*allcuts)
                output['cutflow_msd'].fill(region=region,
                                           genflavor=normalize(
                                               genflavor, None),
                                           cut=0,
                                           weight=weights.weight(),
                                           msd=normalize(msd_matched, None))
                output['cutflow_eta'].fill(region=region,
                                           genflavor=normalize(genflavor, cut),
                                           cut=0,
                                           weight=weights.weight()[cut],
                                           eta=normalize(
                                               candidatejet.eta, cut))
                output['cutflow_pt'].fill(region=region,
                                          genflavor=normalize(genflavor, cut),
                                          cut=0,
                                          weight=weights.weight()[cut],
                                          pt=normalize(candidatejet.pt, cut))
                for i, cut in enumerate(cuts + ['ddcvbpass', 'ddcpass']):
                    allcuts.add(cut)
                    cut = selection.all(*allcuts)
                    output['cutflow_msd'].fill(region=region,
                                               genflavor=normalize(
                                                   genflavor, cut),
                                               cut=i + 1,
                                               weight=weights.weight()[cut],
                                               msd=normalize(msd_matched, cut))
                    output['cutflow_eta'].fill(
                        region=region,
                        genflavor=normalize(genflavor, cut),
                        cut=i + 1,
                        weight=weights.weight()[cut],
                        eta=normalize(candidatejet.eta, cut))
                    output['cutflow_pt'].fill(
                        region=region,
                        genflavor=normalize(genflavor, cut),
                        cut=i + 1,
                        weight=weights.weight()[cut],
                        pt=normalize(candidatejet.pt, cut))

                    if self._evtVizInfo and 'ddcpass' in allcuts and isRealData and region == 'signal':
                        if 'event' not in events.fields:
                            continue
                        _cut = selection.all(*allcuts, '_strict_mass',
                                             '_high_score')
                        # _cut = selection.all('_strict_mass'')
                        output['to_check'][
                            'mass'] += processor.column_accumulator(
                                normalize(msd_matched, _cut))
                        nfatjet = ak.sum(
                            ((fatjets.pt > 200) &
                             (abs(fatjets.eta) < 2.5) & fatjets.isTight),
                            axis=1)
                        output['to_check'][
                            'njet'] += processor.column_accumulator(
                                normalize(nfatjet, _cut))
                        output['to_check'][
                            'fname'] += processor.column_accumulator(
                                np.array([events.metadata['filename']] *
                                         len(normalize(msd_matched, _cut))))
                        output['to_check'][
                            'event'] += processor.column_accumulator(
                                normalize(events.event, _cut))
                        output['to_check'][
                            'luminosityBlock'] += processor.column_accumulator(
                                normalize(events.luminosityBlock, _cut))
                        output['to_check'][
                            'run'] += processor.column_accumulator(
                                normalize(events.run, _cut))

        if shift_name is None:
            systematics = [None] + list(weights.variations)
        else:
            systematics = [shift_name]

        def fill(region, systematic, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            if wmod is None:
                if systematic in weights.variations:
                    weight = weights.weight(modifier=systematic)[cut]
                else:
                    weight = weights.weight()[cut]
            else:
                weight = weights.weight()[cut] * wmod[cut]

            output['templates'].fill(
                region=region,
                systematic=sname,
                runid=runmap(events.run)[cut],
                genflavor=normalize(genflavor, cut),
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(msd_matched, cut),
                ddb=normalize(bvl, cut),
                ddc=normalize(cvl, cut),
                ddcvb=normalize(cvb, cut),
                weight=weight,
            )
            if region in [
                    'wtag', 'wtag0', 'wtag2', 'wtag3', 'wtag4', 'wtag5',
                    'wtag6', 'wtag7', 'noselection'
            ]:  # and sname in ['nominal', 'pileup_weightDown', 'pileup_weightUp', 'jet_triggerDown', 'jet_triggerUp']:
                output['wtag'].fill(
                    region=region,
                    systematic=sname,
                    genflavor=normalize(genflavor, cut),
                    pt=normalize(candidatejet.pt, cut),
                    msd=normalize(msd_matched, cut),
                    n2ddt=normalize(candidatejet.n2ddt, cut),
                    ddc=normalize(cvl, cut),
                    ddcvb=normalize(cvb, cut),
                    weight=weight,
                )
            # if region in ['signal', 'noselection']:
            #     output['etaphi'].fill(
            #         region=region,
            #         systematic=sname,
            #         runid=runmap(events.run)[cut],
            #         genflavor=normalize(genflavor, cut),
            #         pt=normalize(candidatejet.pt, cut),
            #         eta=normalize(candidatejet.eta, cut),
            #         phi=normalize(candidatejet.phi, cut),
            #         ddc=normalize(cvl, cut),
            #         ddcvb=normalize(cvb, cut),
            #     ),
            if not isRealData:
                if wmod is not None:
                    _custom_weight = events.genWeight[cut] * wmod[cut]
                else:
                    _custom_weight = np.ones_like(weight)
                output['genresponse_noweight'].fill(
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=_custom_weight,
                )

                output['genresponse'].fill(
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=weight,
                )
            if systematic is None:
                output['signal_opt'].fill(
                    region=region,
                    genflavor=normalize(genflavor, cut),
                    ddc=normalize(cvl, cut),
                    ddcvb=normalize(cvb, cut),
                    msd=normalize(msd_matched, cut),
                    weight=weight,
                )
                output['signal_optb'].fill(
                    region=region,
                    genflavor=normalize(genflavor, cut),
                    ddb=normalize(bvl, cut),
                    msd=normalize(msd_matched, cut),
                    weight=weight,
                )

        for region in regions:
            cut = selection.all(*(set(regions[region]) - {'n2ddt'}))
            if shift_name is None:
                output['nminus1_n2ddt'].fill(
                    region=region,
                    n2ddt=normalize(candidatejet.n2ddt, cut),
                    weight=weights.weight()[cut],
                )
            for systematic in systematics:
                if isRealData and systematic is not None:
                    continue
                fill(region, systematic)
            if shift_name is None and 'GluGluH' in dataset and 'LHEWeight' in events.fields:
                for i in range(9):
                    fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:,
                                                                          i])
                for c in events.LHEWeight.fields[1:]:
                    fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c])

        toc = time.time()
        output["filltime"] = toc - tic
        if shift_name is None:
            output["weightStats"] = weights.weightStatistics
        return {dataset: output}
    def process(self, events):

        output = self.accumulator.identity()

        output['total']['all'] += len(events)
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        ## Muons
        muon = Collections(ev, "Muon", "vetoTTH").get()
        tightmuon = Collections(ev, "Muon", "tightTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "vetoTTH").get()
        tightelectron = Collections(ev, "Electron", "tightTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)

        lepton = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton + trailing_lepton).mass
        dilepton_pt = (leading_lepton + trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        tau = getTaus(ev)
        track = getIsoTracks(ev)
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator

        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        bl = cross(lepton, high_score_btag)
        bl_dR = delta_r(bl['0'], bl['1'])
        min_bl_dR = ak.min(bl_dR, axis=1)

        jf = cross(j_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        j_fwd2 = jf[ak.singletons(
            ak.argmax(mjf, axis=1)
        )]['1']  # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta = ak.fill_none(
            ak.pad_none(abs(j_fwd2.eta - j_fwd.eta), 1, clip=True), 0)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(tightelectron) + ak.num(tightmuon)) == 2)
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(electron) + ak.num(muon)) == 2)

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('SS', (SSlepton | SSelectron | SSmuon))
        selection.add('N_jet>3', (ak.num(jet) >= 4))
        selection.add('N_central>2', (ak.num(central) >= 3))
        selection.add('N_btag>0', (ak.num(btag) >= 1))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))

        #ss_reqs = ['lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS']
        ss_reqs = [
            'lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS'
        ]
        #bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0', 'N_fwd>0']
        bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0']

        ss_reqs_d = {sel: True for sel in ss_reqs}
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight)

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        #cutflow     = Cutflow(output, ev, weight=weight)
        #cutflow_reqs_d = {}
        #for req in bl_reqs:
        #    cutflow_reqs_d.update({req: True})
        #    cutflow.addRow( req, selection.require(**cutflow_reqs_d) )

        labels = {
            'topW_v3': 0,
            'TTW': 1,
            'TTZ': 2,
            'TTH': 3,
            'ttbar': 4,
            'ttbar1l_MG': 4
        }
        if dataset in labels:
            label_mult = labels[dataset]
        else:
            label_mult = 5
        label = np.ones(len(ev[BL])) * label_mult

        output["n_lep"] += processor.column_accumulator(
            ak.to_numpy((ak.num(electron) + ak.num(muon))[BL]))
        output["n_lep_tight"] += processor.column_accumulator(
            ak.to_numpy((ak.num(tightelectron) + ak.num(tightmuon))[BL]))

        output["lead_lep_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].pt, axis=1)))
        output["lead_lep_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].eta, axis=1)))
        output["lead_lep_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].phi, axis=1)))
        output["lead_lep_charge"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(leading_lepton[BL].charge, axis=1)))

        output["sublead_lep_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].pt, axis=1)))
        output["sublead_lep_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].eta, axis=1)))
        output["sublead_lep_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].phi, axis=1)))
        output["sublead_lep_charge"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(trailing_lepton[BL].charge, axis=1)))

        output["lead_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].pt, axis=1)))
        output["lead_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].eta, axis=1)))
        output["lead_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 0:1][BL].phi, axis=1)))

        output["sublead_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].pt, axis=1)))
        output["sublead_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].eta, axis=1)))
        output["sublead_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(jet[:, 1:2][BL].phi, axis=1)))

        output["lead_btag_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].pt, axis=1)))
        output["lead_btag_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].eta, axis=1)))
        output["lead_btag_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].phi, axis=1)))

        output["sublead_btag_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].pt, axis=1)))
        output["sublead_btag_eta"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].eta, axis=1)))
        output["sublead_btag_phi"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].phi, axis=1)))

        output["fwd_jet_p"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(ak.pad_none(j_fwd[BL].p, 1, clip=True),
                                        0),
                           axis=1)))
        output["fwd_jet_pt"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].pt, 1, clip=True), 0),
                           axis=1)))
        output["fwd_jet_eta"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].eta, 1, clip=True), 0),
                           axis=1)))
        output["fwd_jet_phi"] += processor.column_accumulator(
            ak.to_numpy(
                ak.flatten(ak.fill_none(
                    ak.pad_none(j_fwd[BL].phi, 1, clip=True), 0),
                           axis=1)))

        output["mjj_max"] += processor.column_accumulator(
            ak.to_numpy(ak.fill_none(ak.max(mjf[BL], axis=1), 0)))
        output["delta_eta_jj"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(delta_eta[BL], axis=1)))

        output["met"] += processor.column_accumulator(ak.to_numpy(met_pt[BL]))
        output["ht"] += processor.column_accumulator(ak.to_numpy(ht[BL]))
        output["st"] += processor.column_accumulator(ak.to_numpy(st[BL]))
        output["n_jet"] += processor.column_accumulator(
            ak.to_numpy(ak.num(jet[BL])))
        output["n_btag"] += processor.column_accumulator(
            ak.to_numpy(ak.num(btag[BL])))
        output["n_fwd"] += processor.column_accumulator(
            ak.to_numpy(ak.num(fwd[BL])))
        output["n_central"] += processor.column_accumulator(
            ak.to_numpy(ak.num(central[BL])))
        output["n_tau"] += processor.column_accumulator(
            ak.to_numpy(ak.num(tau[BL])))
        output["n_track"] += processor.column_accumulator(
            ak.to_numpy(ak.num(track[BL])))

        output["dilepton_pt"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(dilepton_pt[BL], axis=1)))
        output["dilepton_mass"] += processor.column_accumulator(
            ak.to_numpy(ak.flatten(dilepton_mass[BL], axis=1)))
        output["min_bl_dR"] += processor.column_accumulator(
            ak.to_numpy(min_bl_dR[BL]))
        output["min_mt_lep_met"] += processor.column_accumulator(
            ak.to_numpy(min_mt_lep_met[BL]))

        output["label"] += processor.column_accumulator(label)
        output["weight"] += processor.column_accumulator(weight.weight()[BL])

        output["presel"]["all"] += len(ev[ss_selection])
        output["sel"]["all"] += len(ev[BL])

        return output
Beispiel #19
0
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        second_lepton = lepton[~(trailing_lepton_idx & leading_lepton_idx)]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        lt = met_pt + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        # define the weight
        weight = Weights(len(ev))

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add(
                "btag",
                self.btagSF.Method1a(btag,
                                     light,
                                     b_direction='central',
                                     c_direction='central'))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        sel = Selection(
            dataset=dataset,
            events=ev,
            year=self.year,
            ele=electron,
            ele_veto=vetoelectron,
            mu=muon,
            mu_veto=vetomuon,
            jet_all=jet,
            jet_central=central,
            jet_btag=btag,
            jet_fwd=fwd,
            met=ev.MET,
        )

        BL = sel.dilep_baseline(SS=False)

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[BL_minusNb],
                           weight=weight.weight()[BL_minusNb])

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run' % dataset] += processor.column_accumulator(
                run_[BL])
            output['%s_lumi' % dataset] += processor.column_accumulator(
                lumi_[BL])
            output['%s_event' % dataset] += processor.column_accumulator(
                event_[BL])

        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations

                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                weight = Weights(len(ev))
                weight.add("weight", ev.weight * cfg['lumi'][self.year])
                weight.add("PU",
                           ev.puWeight,
                           weightUp=ev.puWeightUp,
                           weightDown=ev.puWeightDown,
                           shift=False)
                if var == 'centralUp':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='central',
                                             c_direction='up'))
                elif var == 'centralDown':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='central',
                                             c_direction='down'))
                elif var == 'upCentral':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='up',
                                             c_direction='central'))
                elif var == 'downCentral':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='down',
                                             c_direction='central'))

                weight.add("lepton", self.leptonSF.get(electron, muon))
                met = ev.MET
                sel = Selection(
                    dataset=dataset,
                    events=ev,
                    year=self.year,
                    ele=electron,
                    ele_veto=vetoelectron,
                    mu=muon,
                    mu_veto=vetomuon,
                    jet_all=jet,
                    jet_central=central,
                    jet_btag=btag,
                    jet_fwd=fwd,
                    met=met,
                )

                BL = sel.dilep_baseline(SS=False)

                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[BL_minusNb],
                    weight=weight.weight()[BL_minusNb])

        return output
    def process(self, events):

        output = self.accumulator.identity()

        dataset = events.metadata['dataset']

        output['sumw'][dataset] += ak.sum(np.sign(events.Generator.weight))
        output['nevents'][dataset] += len(events)

        output = self.accumulator.identity()

        dataset = events.metadata['dataset']

        if dataset not in ['singleelectron','singlemuon','egamma']:
            output['sumw'][dataset] += ak.sum(np.sign(events.Generator.weight))
        output['nevents'][dataset] += len(events)

        if dataset in ['singleelectron','singlemuon','egamma']:
            events = events[lumimask(events.run,events.luminosityBlock)]

        events = events[(events.PuppiMET.pt > 30) | (events.PuppiMET.ptJERUp > 30) | (events.PuppiMET.ptJESUp > 30)]    

        if year == "2016":
            if dataset == 'singlemuon':
                events = events[events.HLT.IsoTkMu24 | events.HLT.IsoMu24]
            elif dataset == 'singleelectron':
                events = events[vents.HLT.IsoTkMu24 | events.HLT.IsoMu24 | events.HLT.Ele27_WPTight_Gsf]
            else:    
                events = events[events.HLT.IsoTkMu24 | events.HLT.IsoMu24 | events.HLT.Ele27_WPTight_Gsf]
        elif year == "2017":
            if dataset == 'singlemuon':
                events = events[events.HLT.IsoMu27]
            elif dataset == 'singleelectron':
                events = events[events.HLT.Ele32_WPTight_Gsf_L1DoubleEG]    
            else:
                events = events[events.HLT.IsoMu27 | events.HLT.Ele32_WPTight_Gsf_L1DoubleEG]        
        elif year == "2018":
            if dataset == 'singlemuon':
                events = events[events.HLT.IsoMu24]
            elif dataset == 'egamma':    
                events = events[events.HLT.Ele32_WPTight_Gsf]
            else:
                events = events[events.HLT.IsoMu24 |events.HLT.Ele32_WPTight_Gsf]

        events = events[(ak.num(events.Jet) > 3) | ((ak.num(events.Jet) > 1) & (ak.num(events.FatJet) > 0))]

        events = events[(ak.num(events.Electron) > 0) | (ak.num(events.Muon) > 0)]

        tight_muons = events.Muon[events.Muon.tightId & (events.Muon.pfRelIso04_all < 0.15) & (events.Muon.pt > 26) & (abs(events.Muon.eta) < 2.4)]

        loose_not_tight_muons = events.Muon[events.Muon.tightId & (events.Muon.pfRelIso04_all < 0.4) & (events.Muon.pfRelIso04_all > 0.15) & (events.Muon.pt > 20) & (abs(events.Muon.eta) < 2.4)]

        tight_electrons = events.Electron[(events.Electron.pt > 30) & (events.Electron.cutBased >= 3) & (events.Electron.eta + events.Electron.deltaEtaSC < 2.5) & ((abs(events.Electron.dz) < 0.1) & (abs(events.Electron.dxy) < 0.05) & (events.Electron.eta + events.Electron.deltaEtaSC < 1.479)) | ((abs(events.Electron.dz) < 0.2) & (abs(events.Electron.dxy) < 0.1) & (events.Electron.eta + events.Electron.deltaEtaSC > 1.479))]

        name_map = jec_stack.blank_name_map
        name_map['JetPt'] = 'pt'
        name_map['JetMass'] = 'mass'
        name_map['JetEta'] = 'eta'
        name_map['JetA'] = 'area'

        jets = events.Jet
        
        jets['pt_raw'] = (1 - jets['rawFactor']) * jets['pt']
        jets['mass_raw'] = (1 - jets['rawFactor']) * jets['mass']
        jets['pt_gen'] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32)
        jets['rho'] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0]
        name_map['ptGenJet'] = 'pt_gen'
        name_map['ptRaw'] = 'pt_raw'
        name_map['massRaw'] = 'mass_raw'
        name_map['Rho'] = 'rho'

        events_cache = events.caches[0]

        jet_factory = CorrectedJetsFactory(name_map, jec_stack)
        corrected_jets = jet_factory.build(jets, lazy_cache=events_cache)    

        jet_pt = corrected_jets.pt
        jet_pt_jesup = corrected_jets.JES_jes.up.pt
        jet_pt_jerup = corrected_jets.JER.up.pt

        corrected_jets = ak.zip({
            "pt": corrected_jets.pt,
            "eta": corrected_jets.eta,
            "phi": corrected_jets.phi,
            "mass": corrected_jets.mass,
            "charge": np.ones(len(corrected_jets.pt)),
            "btagDeepB": corrected_jets.btagDeepB
        }, with_name="PtEtaPhiMCandidate")    

        fatjets = events.FatJet[(events.FatJet.pt > 250) & (abs(events.FatJet.eta) < 2.5) & (events.FatJet.msoftdrop > 50) & (events.FatJet.msoftdrop < 150)]    
        b_jets = corrected_jets[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 2.5) & (events.Jet.btagDeepB > 0.8953)]
        vbf_jets = corrected_jets[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7) & (events.Jet.btagDeepB < 0.2217)]
        nextrajets = ak.num(events.Jet[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7)]) - 4
        nextrabjets = ak.num(events.Jet[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7) & (events.Jet.btagDeepB > 0.2217)]) - 2

        basecut_merged = (ak.num(fatjets) > 0) & (ak.num(vbf_jets) > 1) & (ak.num(tight_muons) + ak.num(tight_electrons) == 1) & (ak.num(loose_not_tight_muons) == 0) & (events.PuppiMET.pt > 30)
        events_merged = events[basecut_merged]
        fatjets_merged = fatjets[basecut_merged]
        vbf_jets_merged = vbf_jets[basecut_merged]
        tight_muons_merged = tight_muons[basecut_merged]
        tight_electrons_merged = tight_electrons[basecut_merged]
        nextrajets_merged = nextrajets[basecut_merged]
        nextrabjets_merged = nextrabjets[basecut_merged]

        basecut = (ak.num(b_jets) > 1) & (ak.num(vbf_jets) > 1) & (ak.num(tight_muons) + ak.num(tight_electrons) == 1) & (ak.num(loose_not_tight_muons) == 0) & (events.PuppiMET.pt > 30)
        events = events[basecut]
        b_jets = b_jets[basecut]
        vbf_jets = vbf_jets[basecut]
        tight_muons = tight_muons[basecut]
        tight_electrons = tight_electrons[basecut]
        nextrajets = nextrajets[basecut]
        nextrabjets = nextrabjets[basecut]

        if dataset in ['singleelectron','singlemuon','egamma']:
            dataset = 'data'

        if ak.any(basecut_merged):
            cut7 = (fatjets_merged[:,0].mass > 50) & (fatjets_merged[:,0].mass < 150) & ((vbf_jets_merged[:,0]+vbf_jets_merged[:,1]).mass > 500) & (abs(vbf_jets_merged[:,0].eta - vbf_jets_merged[:,1].eta) > 2.5) & (ak.num(tight_muons_merged) > 0)
            cut8 = (fatjets_merged[:,0].mass > 50) & (fatjets_merged[:,0].mass < 150) & ((vbf_jets_merged[:,0]+vbf_jets_merged[:,1]).mass > 500) & (abs(vbf_jets_merged[:,0].eta - vbf_jets_merged[:,1].eta) > 2.5) & (ak.num(tight_electrons_merged) > 0)
#            cut9 = cut7 | cut8

        cut1 = ((b_jets[:,0] + b_jets[:,1]).mass > 50) & ((b_jets[:,0] + b_jets[:,1]).mass < 150) & ((vbf_jets[:,0] + vbf_jets[:,1]).mass > 500) & (abs(vbf_jets[:,0].eta - vbf_jets[:,1].eta) > 2.5) & (ak.num(tight_muons) > 0)
        cut2 = ((b_jets[:,0] + b_jets[:,1]).mass > 50) & ((b_jets[:,0] + b_jets[:,1]).mass < 150) & ((vbf_jets[:,0] + vbf_jets[:,1]).mass > 500) & (abs(vbf_jets[:,0].eta - vbf_jets[:,1].eta) > 2.5) & (ak.num(tight_electrons) > 0)
#            cut3 = cut1 | cut2

        if ak.any(basecut_merged) and ak.any(cut7):

            sel7_events = events_merged[cut7]
            sel7_fatjets = fatjets_merged[cut7]
            sel7_vbf_jets = vbf_jets_merged[cut7]
            sel7_muons = tight_muons_merged[cut7][:,0]
            sel7_nextrajets = nextrajets_merged[cut7]
            sel7_nextrabjets = nextrabjets_merged[cut7]

            output["weights_merged"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel7_events.Generator.weight).data))

            output['variables_merged'][dataset] += processor.column_accumulator(np.transpose(np.vstack((
                ak.to_numpy(sel7_fatjets[:,0].pt),
                ak.to_numpy(sel7_fatjets[:,0].eta),
                ak.to_numpy(sel7_fatjets[:,0].phi),
                ak.to_numpy(sel7_fatjets[:,0].btagDeepB),
                ak.to_numpy(sel7_fatjets[:,0].btagHbb),
                ak.to_numpy(sel7_fatjets[:,0].msoftdrop),
                ak.to_numpy(sel7_nextrajets),
                ak.to_numpy(sel7_nextrabjets),
                np.zeros(len(sel7_events)),
                np.sign(ak.to_numpy(sel7_muons.charge)+1),
                ak.to_numpy(sel7_muons.pt),
                ak.to_numpy(sel7_muons.eta),
                ak.to_numpy(sel7_muons.phi),
                ak.to_numpy(sel7_events.PuppiMET.pt),
                ak.to_numpy(sel7_events.PuppiMET.phi),
                ak.to_numpy(sel7_vbf_jets[:,0].pt),
                ak.to_numpy(sel7_vbf_jets[:,1].pt),
                ak.to_numpy(sel7_vbf_jets[:,0].eta),
                ak.to_numpy(sel7_vbf_jets[:,1].eta),
                ak.to_numpy(sel7_vbf_jets[:,0].phi),
                ak.to_numpy(sel7_vbf_jets[:,1].phi),
                ak.to_numpy(sel7_vbf_jets[:,0].btagDeepB),
                ak.to_numpy(sel7_vbf_jets[:,1].btagDeepB),
                ak.to_numpy((sel7_vbf_jets[:,0]+sel7_vbf_jets[:,1]).mass),
                ak.to_numpy(sel7_vbf_jets[:,0].eta - sel7_vbf_jets[:,1].eta),
                ak.to_numpy(np.sqrt(2*(sel7_muons+sel7_vbf_jets[:,0]).pt*sel7_events.PuppiMET.pt*(1 - np.cos(sel7_events.PuppiMET.phi - (sel7_muons+sel7_vbf_jets[:,0]).phi)))),
                ak.to_numpy(np.sqrt(2*(sel7_muons+sel7_vbf_jets[:,1]).pt*sel7_events.PuppiMET.pt*(1 - np.cos(sel7_events.PuppiMET.phi - (sel7_muons+sel7_vbf_jets[:,1]).phi))))))))

            sel7_muonidsf = evaluator['muonidsf'](abs(sel7_muons.eta), sel7_muons.pt)
            sel7_muonisosf = evaluator['muonisosf'](abs(sel7_muons.eta), sel7_muons.pt)
            sel7_muonhltsf = evaluator['muonhltsf'](abs(sel7_muons.eta), sel7_muons.pt)
            sel7_weight = np.sign(sel7_events.Generator.weight)*sel7_events.L1PreFiringWeight.Nom*sel7_muonidsf*sel7_muonisosf*sel7_muonhltsf

        if ak.any(basecut_merged) and ak.any(cut8):

            sel8_events = events_merged[cut8]
            sel8_fatjets = fatjets_merged[cut8]
            sel8_vbf_jets = vbf_jets_merged[cut8]
            sel8_electrons = tight_electrons_merged[cut8][:,0]
            sel8_nextrajets = nextrajets_merged[cut8]
            sel8_nextrabjets = nextrabjets_merged[cut8]

            output["weights_merged"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel8_events.Generator.weight).data))

            output['variables_merged'][dataset] += processor.column_accumulator(np.transpose(np.vstack((
                ak.to_numpy(sel8_fatjets[:,0].pt),
                ak.to_numpy(sel8_fatjets[:,0].eta),
                ak.to_numpy(sel8_fatjets[:,0].phi),
                ak.to_numpy(sel8_fatjets[:,0].btagDeepB),
                ak.to_numpy(sel8_fatjets[:,0].btagHbb),
                ak.to_numpy(sel8_fatjets[:,0].msoftdrop),
                ak.to_numpy(sel8_nextrajets),
                ak.to_numpy(sel8_nextrabjets),
                np.ones(len(sel8_events)),
                np.sign(ak.to_numpy(sel8_electrons.charge)+1),
                ak.to_numpy(sel8_electrons.pt),
                ak.to_numpy(sel8_electrons.eta),
                ak.to_numpy(sel8_electrons.phi),
                ak.to_numpy(sel8_events.PuppiMET.pt),
                ak.to_numpy(sel8_events.PuppiMET.phi),
                ak.to_numpy(sel8_vbf_jets[:,0].pt),
                ak.to_numpy(sel8_vbf_jets[:,1].pt),
                ak.to_numpy(sel8_vbf_jets[:,0].eta),
                ak.to_numpy(sel8_vbf_jets[:,1].eta),
                ak.to_numpy(sel8_vbf_jets[:,0].phi),
                ak.to_numpy(sel8_vbf_jets[:,1].phi),
                ak.to_numpy(sel8_vbf_jets[:,0].btagDeepB),
                ak.to_numpy(sel8_vbf_jets[:,1].btagDeepB),
                ak.to_numpy((sel8_vbf_jets[:,0]+sel8_vbf_jets[:,1]).mass),
                ak.to_numpy(sel8_vbf_jets[:,0].eta - sel8_vbf_jets[:,1].eta),
                ak.to_numpy(np.sqrt(2*(sel8_electrons+sel8_vbf_jets[:,0]).pt*sel8_events.PuppiMET.pt*(1 - np.cos(sel8_events.PuppiMET.phi - (sel8_electrons+sel8_vbf_jets[:,0]).phi)))),
                ak.to_numpy(np.sqrt(2*(sel8_electrons+sel8_vbf_jets[:,1]).pt*sel8_events.PuppiMET.pt*(1 - np.cos(sel8_events.PuppiMET.phi - (sel8_electrons+sel8_vbf_jets[:,1]).phi))))))))

            sel8_electronidsf = evaluator['electronidsf'](sel8_electrons.eta, sel8_electrons.pt)
            sel8_electronrecosf = evaluator['electronrecosf'](sel8_electrons.eta, sel8_electrons.pt)
            sel8_weight = np.sign(sel8_events.Generator.weight)*sel8_events.L1PreFiringWeight.Nom*sel8_electronidsf*sel8_electronrecosf

        if ak.any(basecut) and ak.any(cut1):

            sel1_events = events[cut1]
            
            sel1_b_jets = b_jets[cut1]

            sel1_vbf_jets = vbf_jets[cut1]

            sel1_muons = tight_muons[cut1][:,0]

            sel1_nextrajets = nextrajets[cut1]

            sel1_nextrabjets = nextrabjets[cut1]

            output["weights"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel1_events.Generator.weight).data))

            output['variables'][dataset] += processor.column_accumulator(np.transpose(np.vstack((
                ak.to_numpy(sel1_nextrajets),
                ak.to_numpy(sel1_nextrabjets),
                np.zeros(len(sel1_events)),
                np.sign(ak.to_numpy(sel1_muons.charge)+1),
                ak.to_numpy(sel1_muons.pt),
                ak.to_numpy(sel1_muons.eta),
                ak.to_numpy(sel1_muons.phi),
                ak.to_numpy(sel1_events.PuppiMET.pt),
                ak.to_numpy(sel1_events.PuppiMET.phi),
                ak.to_numpy(sel1_b_jets[:,0].pt),
                ak.to_numpy(sel1_b_jets[:,1].pt),
                ak.to_numpy(sel1_vbf_jets[:,0].pt),
                ak.to_numpy(sel1_vbf_jets[:,1].pt),
                ak.to_numpy(sel1_b_jets[:,0].eta),
                ak.to_numpy(sel1_b_jets[:,1].eta),
                ak.to_numpy(sel1_vbf_jets[:,0].eta),
                ak.to_numpy(sel1_vbf_jets[:,1].eta),
                ak.to_numpy(sel1_b_jets[:,0].phi),
                ak.to_numpy(sel1_b_jets[:,1].phi),
                ak.to_numpy(sel1_vbf_jets[:,0].phi),
                ak.to_numpy(sel1_vbf_jets[:,1].phi),
                ak.to_numpy(sel1_b_jets[:,0].btagDeepB),
                ak.to_numpy(sel1_b_jets[:,1].btagDeepB),
                ak.to_numpy(sel1_vbf_jets[:,0].btagDeepB),
                ak.to_numpy(sel1_vbf_jets[:,1].btagDeepB),
                ak.to_numpy((sel1_b_jets[:,0]+sel1_b_jets[:,1]).mass),
                ak.to_numpy((sel1_vbf_jets[:,0]+sel1_vbf_jets[:,1]).mass),
                ak.to_numpy(sel1_vbf_jets[:,0].eta - sel1_vbf_jets[:,1].eta),
                ak.to_numpy(np.sqrt(2*(sel1_muons+sel1_b_jets[:,0]).pt*sel1_events.PuppiMET.pt*(1 - np.cos(sel1_events.PuppiMET.phi - (sel1_muons+sel1_b_jets[:,0]).phi)))),ak.to_numpy(np.sqrt(2*(sel1_muons+sel1_b_jets[:,1]).pt*sel1_events.PuppiMET.pt*(1 - np.cos(sel1_events.PuppiMET.phi - (sel1_muons+sel1_b_jets[:,1]).phi))))))))

            sel1_pu_weight = evaluator['pileup'](sel1_events.Pileup.nTrueInt)
            sel1_muonidsf = evaluator['muonidsf'](abs(sel1_muons.eta), sel1_muons.pt)
            sel1_muonisosf = evaluator['muonisosf'](abs(sel1_muons.eta), sel1_muons.pt)
            sel1_muonhltsf = evaluator['muonhltsf'](abs(sel1_muons.eta), sel1_muons.pt)
            sel1_weight = np.sign(sel1_events.Generator.weight)*sel1_pu_weight*sel1_events.L1PreFiringWeight.Nom*sel1_muonidsf*sel1_muonisosf*sel1_muonhltsf

        if ak.any(basecut) and ak.any(cut2):
       
            sel2_events = events[cut2]
            sel2_b_jets = b_jets[cut2]
            sel2_vbf_jets = vbf_jets[cut2]
            sel2_electrons = tight_electrons[cut2][:,0]
            sel2_nextrajets = nextrajets[cut2]
            sel2_nextrabjets = nextrabjets[cut2]
         
            output["weights"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel2_events.Generator.weight).data))

            output['variables'][dataset] += processor.column_accumulator(np.transpose(np.vstack((
                ak.to_numpy(sel2_nextrajets),
                ak.to_numpy(sel2_nextrabjets),
                np.ones(len(sel2_events)),
                np.sign(ak.to_numpy(sel2_electrons.charge)+1),
                ak.to_numpy(sel2_electrons.pt),
                ak.to_numpy(sel2_electrons.eta),
                ak.to_numpy(sel2_electrons.phi),
                ak.to_numpy(sel2_events.PuppiMET.pt),
                ak.to_numpy(sel2_events.PuppiMET.phi),
                ak.to_numpy(sel2_b_jets[:,0].pt),
                ak.to_numpy(sel2_b_jets[:,1].pt),
                ak.to_numpy(sel2_vbf_jets[:,0].pt),
                ak.to_numpy(sel2_vbf_jets[:,1].pt),
                ak.to_numpy(sel2_b_jets[:,0].eta),
                ak.to_numpy(sel2_b_jets[:,1].eta),
                ak.to_numpy(sel2_vbf_jets[:,0].eta),
                ak.to_numpy(sel2_vbf_jets[:,1].eta),
                ak.to_numpy(sel2_b_jets[:,0].phi),
                ak.to_numpy(sel2_b_jets[:,1].phi),
                ak.to_numpy(sel2_vbf_jets[:,0].phi),
                ak.to_numpy(sel2_vbf_jets[:,1].phi),
                ak.to_numpy(sel2_b_jets[:,0].btagDeepB),
                ak.to_numpy(sel2_b_jets[:,1].btagDeepB),
                ak.to_numpy(sel2_vbf_jets[:,0].btagDeepB),
                ak.to_numpy(sel2_vbf_jets[:,1].btagDeepB),
                ak.to_numpy((sel2_b_jets[:,0]+sel2_b_jets[:,1]).mass),
                ak.to_numpy((sel2_vbf_jets[:,0]+sel2_vbf_jets[:,1]).mass),
                ak.to_numpy(sel2_vbf_jets[:,0].eta - sel2_vbf_jets[:,1].eta),
                ak.to_numpy(np.sqrt(2*(sel2_electrons+sel2_b_jets[:,0]).pt*sel2_events.PuppiMET.pt*(1 - np.cos(sel2_events.PuppiMET.phi - (sel2_electrons+sel2_b_jets[:,0]).phi)))),ak.to_numpy(np.sqrt(2*(sel2_electrons+sel2_b_jets[:,1]).pt*sel2_events.PuppiMET.pt*(1 - np.cos(sel2_events.PuppiMET.phi - (sel2_electrons+sel2_b_jets[:,1]).phi))))))))

            sel2_pu_weight = evaluator['pileup'](sel2_events.Pileup.nTrueInt)

            sel2_electronidsf = evaluator['electronidsf'](sel2_electrons.eta, sel2_electrons.pt)
            sel2_electronrecosf = evaluator['electronrecosf'](sel2_electrons.eta, sel2_electrons.pt)

            sel2_weight = np.sign(sel2_events.Generator.weight)*sel2_pu_weight*sel2_events.L1PreFiringWeight.Nom*sel2_electronidsf*sel2_electronrecosf

        return output
    def __init__(self, sync=False,  categories=[], 
                 checklist=pd.DataFrame([]),
                 sample_list_dir="../sample_lists"):

        # load in fastmtt
        fastmtt_dir = '../svfit/fastmtt/'
        for basename in ['MeasuredTauLepton', 'svFitAuxFunctions', 'FastMTT']:
            path = fastmtt_dir + basename
            if os.path.isfile("{0:s}_cc.so".format(path)):
                ROOT.gInterpreter.ProcessLine(".L {0:s}_cc.so".format(path))
            else:
                ROOT.gInterpreter.ProcessLine(".L {0:s}.cc++".format(path))
        
        # customize the 4l final states considered
        if categories == 'all':
            self.categories = {1: 'eeet', 2: 'eemt', 3: 'eett', 4: 'eeem',
                               5: 'mmet', 6: 'mmmt', 7: 'mmtt', 8: 'mmem'}
        else:
            self.categories = {i:cat for i, cat in enumerate(categories)}
        print("\n...running on", self.categories)
        
        # sync mode runs a subset of the full analysis
        self.sync = sync
        self.mode = 'sync' if self.sync else 'all'
        self.checklist = checklist # failing sync events to double-check
        self.princeton_exclusive = np.array([248633, 250132, 250374, 256311, 2568862, 259595, 395373, 488027, 490292, 491592])
        
        # location of the samples, usually differentiates sync vs. all
        self.sample_list_dir = sample_list_dir
        
        # correct number of leptons in each final state
        self.correct_n_electrons = {'eeem': 3, 'eeet': 3, 'eemt': 2, 
                                    'eett': 2, 'mmem': 1, 'mmet': 1, 
                                    'mmmt': 0, 'mmtt': 0}
        self.correct_n_muons = {'eeem': 1, 'eeet': 0, 'eemt': 1, 
                                'eett': 0, 'mmem': 3, 'mmet': 2, 
                                'mmmt': 3, 'mmtt': 2}
        
        # histogram axes specify histo names, labels, and bin shapes
        category_axis = hist.Cat("category", "")
        dataset_axis = hist.Cat("dataset", "")
        particle_axis = hist.Cat("particle", "")
        
        pt_axis = hist.Bin("pt", "$p_T$ [GeV]", 20, 0, 200)        
        eta_axis = hist.Bin("eta", "$\eta$ [GeV]", 10, -5, 5)        
        phi_axis = hist.Bin("phi", "$\phi$ [GeV]", 10, -np.pi, np.pi)
        
        mll_axis = hist.Bin("mll", "$m(l_1,l_2)$ [GeV]", 40, 0, 200)
        mtt_axis = hist.Bin("mtt", "$m(t_1,t_2)$ [GeV]", 30, 0, 300)
        mA_axis  = hist.Bin("mA", "$m_A$ [GeV]", 40, 0, 400)
        mass_type_axis = hist.Cat("mass_type", "")

        nbtag_axis = hist.Bin("nbtag", "$n_{btag}$", 5, 0, 5)
        njets_axis = hist.Bin("njets", "$n_{jets}$", 5, 0, 5)
        jpt1_axis = hist.Bin("jpt1", "$p_T(j_1)$ [GeV]", 20, 0, 200)
        jeta1_axis = hist.Bin("jeta1", "$\eta (j_1)$ [GeV]", 10, -5, 5)
        jphi1_axis = hist.Bin("jphi1", "$\phi (j_1)$ [GeV]", 10, -np.pi, np.pi)
        bpt1_axis = hist.Bin("bpt1", "$p_T(b_1)$ [GeV]", 20, 0, 200)
        beta1_axis = hist.Bin("beta1", "$\eta (b_1)$ [GeV]", 10, -5, 5)
        bphi1_axis = hist.Bin("bphi1", "$\phi (b_1)$ [GeV]", 10, -np.pi, np.pi)
        
        self._accumulator = processor.dict_accumulator({
            
            # event info, weights
            "sumw": processor.defaultdict_accumulator(float),
            "evt": processor.column_accumulator(np.array([])),
            "lumi": processor.column_accumulator(np.array([])),
            "run": processor.column_accumulator(np.array([])),
            "cat": processor.column_accumulator(np.array([])),
            "mll_array": processor.column_accumulator(np.array([])),
            "msv_cons_array": processor.column_accumulator(np.array([])),
            "m_mumu": processor.column_accumulator(np.array([])), 

            # histograms
            "pt": hist.Hist("Events", dataset_axis, category_axis, pt_axis, particle_axis),
            "eta": hist.Hist("Events", dataset_axis, category_axis, eta_axis, particle_axis),
            "phi": hist.Hist("Events", dataset_axis, category_axis, phi_axis, particle_axis),
            "mll": hist.Hist("Events", dataset_axis, category_axis, mll_axis),
            "mtt": hist.Hist("Events", dataset_axis, category_axis, mtt_axis, mass_type_axis),
            "m4l": hist.Hist("Events", dataset_axis, category_axis, mA_axis,  mass_type_axis),
            "nbtag": hist.Hist("Events", dataset_axis, category_axis, nbtag_axis),
            "njets": hist.Hist("Events", dataset_axis, category_axis, njets_axis),
            
            # cutflow 
            'cutflow': processor.defaultdict_accumulator(
                partial(processor.defaultdict_accumulator, int)
            ),
            'cutflow_sync': processor.defaultdict_accumulator(
                partial(processor.defaultdict_accumulator, int)
            )
    })
    def process(self, events):
        # grab dataset metadata
        self.dataset = events.metadata['dataset']
        year = self.dataset.split('_')[-1]
        self.output = self.accumulator.identity()
        print(year, self.dataset)

        # grab event id data
        self.event_ids = pd.DataFrame({'run': np.array(events.run, dtype=int),
                                       'lumi': np.array(events.luminosityBlock, dtype=int),
                                       'evt': np.array(events.event, dtype=int)})
        self.fill_cutflow('all events', len(events), 
                          N_sync = self.check_events(self.event_ids))

        # name data-taking eras, integrated lumis
        eras = {'2016': 'Summer16', '2017': 'Fall17', '2018': 'Autumn18'}
        lumi = {'2016': 35.9, '2017': 41.5, '2018': 59.7}

        # load properties of each sample
        with open("{0}/samples_{1}/{2}_properties.yaml"
                  .format(self.sample_list_dir,
                          self.mode, self.dataset), 'r') as stream:
            try:
                properties = yaml.safe_load(stream)
            except yaml.YAMLError as exc:
                print(exc)
        xsec = float(properties[self.dataset]['xsec'])
        total_weight = float(properties[self.dataset]['total_weight'])
        sample_weight = lumi[year]*xsec/total_weight
        if ('data' in self.dataset): sample_weight = 1.
    
        #############
        ## FILTERS ##
        #############
        # calculate the MET filter
        flags = events.Flag
        MET_filter = (flags.goodVertices & flags.HBHENoiseFilter &
                      flags.HBHENoiseIsoFilter &
                      flags.EcalDeadCellTriggerPrimitiveFilter &
                      flags.BadPFMuonFilter & flags.ecalBadCalibFilter)
        
        # calculate PV quality filter
        pv = events.PV
        pv_filter = ((pv.ndof > 4) &
                     (abs(pv.z) < 24) & 
                     (np.sqrt(pv.x**2 + pv.y**2) < 2))
        
        # apply filters
        events = events[MET_filter & pv_filter]
        self.event_ids = self.event_ids[MET_filter & 
                                        pv_filter]
        self.fill_cutflow('MET, pv filters', len(events),
                          N_sync = self.check_events(self.event_ids))

        ######################
        ## LOOSE SELECTIONS ##
        ######################
        # apply loose selections
        loose_taus = self.loose_tau_selections(events.Tau)
        loose_muons = self.loose_muon_selections(events.Muon)
        loose_electrons = self.loose_electron_selections(events.Electron)
        loose_jets = self.loose_jet_selections(events.Jet, year)
        loose_bjets = self.loose_bjet_selections(events.Jet, year)
        MET = events.MET
        trigger_objects = events.TrigObj
        
        #if category=='eemt':
        #    self.check_princeton_exclusive_loose(loose_electrons, loose_muons, loose_taus)

        # count electrons minus overlapped objects
        electron_counts = self.count_non_overlapped(loose_electrons)
        muon_counts = self.count_non_overlapped(loose_muons)

        ll_pairs = { 'ee': loose_electrons.distincts(), 
                     'mm': loose_muons.distincts() }
        tt_pairs = { 'mt': loose_muons.cross(loose_taus),
                     'et': loose_electrons.cross(loose_taus),
                     'em': loose_electrons.cross(loose_muons),
                     'tt': loose_taus.distincts() }        
        
        ###############################
        ## SELECTIONS (per category) ##
        ###############################
        for c, category in self.categories.items():
            # identify correct trigger path
            HLT = events.HLT
            trigger_path = self.trigger_path(HLT, year, category, sync=self.sync)

            # n_leptons veto 
            n_lepton_mask = self.n_lepton_veto(electron_counts, muon_counts, category)
            n_lepton_mask = n_lepton_mask & trigger_path # combine with trigger path
            jets, bjets = loose_jets[n_lepton_mask], loose_bjets[n_lepton_mask]
            self.met = MET[n_lepton_mask]
            trig_obj = trigger_objects[n_lepton_mask]

            # track event ids on a per-category basis
            self.evt_ids = self.event_ids[n_lepton_mask]
            
            # form 4l final states
            ll = ll_pairs[category[:2]][n_lepton_mask]
            tt = tt_pairs[category[2:]][n_lepton_mask]
            lltt = ll.cross(tt)
            self.fill_cutflow('n_lepton veto', len(self.evt_ids),
                              N_sync = self.check_events(self.evt_ids))
            
            # build non-overlapped final state objects 
            lltt = self.dR_cut(ll.cross(tt), category, cutflow=True)
            lltt = self.build_Z_cand(lltt, category, cutflow=True)
            lltt = self.trigger_filter(lltt, trig_obj, category, cutflow=True)
            lltt = self.build_ditau_cand(lltt, category, cutflow=True)

            # apply b jet veto
            #self.evt_ids = self.evt_ids[bjets.counts==0]
            #lltt, met = lltt[bjets.counts==0], met[bjets.counts==0]
            #jets = jets[bjets.counts==0], 
            #bjets = bjets[bjets.counts==0]
            #self.fill_cutflow('bjet veto', lltt[lltt.counts>0].shape[0],
            #                  N_sync = self.check_events(self.evt_ids[lltt.counts>0]))

            # take only valid final states
            self.evt_ids = self.evt_ids[lltt.counts>0]
            self.met = self.met[lltt.counts>0]
            lltt = lltt[lltt.counts>0]
            mll, mtt, m4l = self.get_masses(lltt, cutflow=True)
            msv, msv_cons, mA_corr, mA_cons = self.run_fastmtt(lltt, self.met, category)

            #################
            ## FILL HISTOS ##
            #################
            self.output["evt"] += processor.column_accumulator(self.evt_ids['evt'].to_numpy()) 
            self.output["lumi"] += processor.column_accumulator(self.evt_ids['lumi'].to_numpy())
            self.output["run"] += processor.column_accumulator(self.evt_ids['run'].to_numpy())
            self.output["cat"] += processor.column_accumulator(np.array([category 
                                                                         for _ in range(len(self.evt_ids))]))

            if category=='eemt':
                self.check_princeton_exclusive_fs(lltt)
            
            pts = [lltt.i0.pt.flatten(), lltt.i1.pt.flatten(),
                   lltt.i2.pt.flatten(), lltt.i3.pt.flatten()]
            etas = [lltt.i0.eta.flatten(), lltt.i1.eta.flatten(),
                    lltt.i2.eta.flatten(), lltt.i3.eta.flatten()]
            phis = [lltt.i0.phi.flatten(), lltt.i1.phi.flatten(),
                    lltt.i2.phi.flatten(), lltt.i3.phi.flatten()]
            particle_nums = ["$l_1$", "$l_2$", "$t_1$", "$t_2$"]
            for i, pnum in enumerate(particle_nums):
                
                self.output["pt"].fill(dataset=self.dataset, category=category, pt=pts[i], 
                                       particle=pnum, weight=sample_weight*np.ones(len(pts[i])))
                self.output["eta"].fill(dataset=self.dataset, category=category, eta=etas[i], 
                                        particle=pnum, weight=sample_weight*np.ones(len(etas[i])))
                self.output["phi"].fill(dataset=self.dataset, category=category, phi=phis[i],
                                        particle=pnum, weight=sample_weight*np.ones(len(phis[i])))
            
            if (category[:2]=='mm'): self.output["m_mumu"] += processor.column_accumulator(np.array(mll.flatten()))
            self.output["mll_array"] += processor.column_accumulator(np.array(mll.flatten()))
            self.output["msv_cons_array"] += processor.column_accumulator(np.array(msv_cons.flatten()))
            
            self.output["mll"].fill(dataset=self.dataset, category=category, mll=mll.flatten(), 
                                    weight=sample_weight*np.ones(len(mll.flatten())))
            self.output["mtt"].fill(dataset=self.dataset, category=category, mass_type="$m_{tt}$", 
                                    mtt=mtt.flatten(), weight=sample_weight*np.ones(len(mtt.flatten())))
            self.output["mtt"].fill(dataset=self.dataset, category=category, mass_type="$m_{fastmtt}$", 
                                    mtt=msv.flatten(), weight=sample_weight*np.ones(len(msv.flatten())))
            self.output["m4l"].fill(dataset=self.dataset, category=category, mass_type='$m_{4l}$', 
                                    mA=m4l.flatten())
            self.output["m4l"].fill(dataset=self.dataset, category=category, mass_type='$m_A^{corr}$', 
                                    mA=mA_corr.flatten())
            self.output["m4l"].fill(dataset=self.dataset, category=category, mass_type='$m_A^{cons}$', 
                                    mA=mA_cons.flatten())

            #nbtag = good_bjets.counts.flatten()
            #output["nbtag"].fill(dataset=dataset, category=category, nbtag=nbtag, weight=sample_weight*np.ones(len(nbtag)))
            #njets = good_jets.counts.flatten()
            #output["njets"].fill(dataset=dataset, category=category, njets=njets, weight=sample_weight*np.ones(len(njets)))

        return self.output
    'lead_lep_phi',
    'lead_lep_charge',
    'sublead_lep_pt',
    'sublead_lep_eta',
    'sublead_lep_phi',
    'sublead_lep_charge',
    'dilepton_mass',
    'dilepton_pt',
    'min_bl_dR',
    'min_mt_lep_met',
    'label',
    'weight',
]

for var in variables:
    out_dict.update({var: processor.column_accumulator(np.zeros(shape=(0, )))})


class ML_preprocessor(processor.ProcessorABC):
    '''
    e.g. deltaR of leptons, min deltaR of lepton and jet

    '''
    def __init__(self, year=2018):

        self.year = year

        self._accumulator = processor.dict_accumulator(out_dict)
        self.btagSF = btag_scalefactor(year)

        self.leptonSF = LeptonSF(year=year)
Beispiel #24
0
def empty_column_accumulator_int64():
    return processor.column_accumulator(np.array([], dtype=np.int64))
Beispiel #25
0
    def process(self, events):

        events = events[
            ak.num(events.Jet) >
            0]  #corrects for rare case where there isn't a single jet in event
        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        # cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ### For FCNC, we want electron -> tightTTH
        electron = Collections(ev, "Electron", "tightFCNC").get()
        fakeableelectron = Collections(ev, "Electron", "fakeableFCNC").get()

        muon = Collections(ev, "Muon", "tightFCNC").get()
        fakeablemuon = Collections(ev, "Muon", "fakeableFCNC").get()

        ##Jets
        Jets = events.Jet

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        lepton = fakeablemuon  #ak.concatenate([fakeablemuon, fakeableelectron], axis=1)
        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        selection = PackedSelection()
        selection.add('MET<20', (ev.MET.pt < 20))
        selection.add('mt<20', min_mt_lep_met < 20)
        #selection.add('MET<19',        (ev.MET.pt<19) )
        selection_reqs = ['MET<20', 'mt<20']  #, 'MET<19']
        fcnc_reqs_d = {sel: True for sel in selection_reqs}
        fcnc_selection = selection.require(**fcnc_reqs_d)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)

        jets = getJets(
            ev, maxEta=2.4, minPt=25, pt_var='pt'
        )  #& (ak.num(jets[~match(jets, fakeablemuon, deltaRCut=1.0)])>=1)
        single_muon_sel = (ak.num(muon) == 1) & (ak.num(fakeablemuon) == 1) | (
            ak.num(muon) == 0) & (ak.num(fakeablemuon) == 1)
        single_electron_sel = (ak.num(electron) == 1) & (
            ak.num(fakeableelectron)
            == 1) | (ak.num(electron) == 0) & (ak.num(fakeableelectron) == 1)
        fcnc_muon_sel = (ak.num(
            jets[~match(jets, fakeablemuon, deltaRCut=1.0)]) >=
                         1) & fcnc_selection & single_muon_sel
        fcnc_electron_sel = (ak.num(
            jets[~match(jets, fakeableelectron, deltaRCut=1.0)]) >=
                             1) & fcnc_selection & single_electron_sel
        tight_muon_sel = (ak.num(muon) == 1) & fcnc_muon_sel
        loose_muon_sel = (ak.num(fakeablemuon) == 1) & fcnc_muon_sel
        tight_electron_sel = (ak.num(electron) == 1) & fcnc_electron_sel
        loose_electron_sel = (ak.num(fakeableelectron)
                              == 1) & fcnc_electron_sel

        output['single_mu_fakeable'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].eta))))
        output['single_mu'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(muon[tight_muon_sel].conePt)),
            eta=np.abs(ak.to_numpy(ak.flatten(muon[tight_muon_sel].eta))))
        output['single_e_fakeable'].fill(
            dataset=dataset,
            pt=ak.to_numpy(
                ak.flatten(fakeableelectron[loose_electron_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(
                    ak.flatten(fakeableelectron[loose_electron_sel].eta))))
        output['single_e'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(electron[tight_electron_sel].conePt)),
            eta=np.abs(
                ak.to_numpy(ak.flatten(electron[tight_electron_sel].eta))))

        if self.debug:
            #create pandas dataframe for debugging
            passed_events = ev[tight_muon_sel]
            passed_muons = muon[tight_muon_sel]
            event_p = ak.to_pandas(passed_events[["event"]])
            event_p["MET_PT"] = passed_events["MET"]["pt"]
            event_p["mt"] = min_mt_lep_met[tight_muon_sel]
            event_p["num_tight_mu"] = ak.to_numpy(ak.num(muon)[tight_muon_sel])
            event_p["num_loose_mu"] = ak.num(fakeablemuon)[tight_muon_sel]
            muon_p = ak.to_pandas(
                ak.flatten(passed_muons)[[
                    "pt", "conePt", "eta", "dz", "dxy", "ptErrRel",
                    "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso",
                    "jetPtRelv2"
                ]])
            #convert to numpy array for the output
            events_array = pd.concat([muon_p, event_p], axis=1)

            events_to_add = [6886009]
            for e in events_to_add:
                tmp_event = ev[ev.event == e]
                added_event = ak.to_pandas(tmp_event[["event"]])
                added_event["MET_PT"] = tmp_event["MET"]["pt"]
                added_event["mt"] = min_mt_lep_met[ev.event == e]
                added_event["num_tight_mu"] = ak.to_numpy(
                    ak.num(muon)[ev.event == e])
                added_event["num_loose_mu"] = ak.to_numpy(
                    ak.num(fakeablemuon)[ev.event == e])
                add_muon = ak.to_pandas(
                    ak.flatten(muon[ev.event == e])[[
                        "pt", "conePt", "eta", "dz", "dxy", "ptErrRel",
                        "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso",
                        "jetPtRelv2"
                    ]])
                add_concat = pd.concat([add_muon, added_event], axis=1)
                events_array = pd.concat([events_array, add_concat], axis=0)

            output['muons_df'] += processor.column_accumulator(
                events_array.to_numpy())

        return output
Beispiel #26
0
def empty_column_accumulator_float16():
    return processor.column_accumulator(np.array([], dtype=np.float16))
    def process(self, df):
        output = self.accumulator.identity()
        if df.size == 0: return output

        dataset = df['dataset']
        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'],
            py=df['pfjet_p4.fCoordinates.fY'],
            pz=df['pfjet_p4.fCoordinates.fZ'],
            energy=df['pfjet_p4.fCoordinates.fT'],
            pfisoAll05=df['pfjet_pfIsolation05'],
            pfisoNopu05=df['pfjet_pfIsolationNoPU05'],
            pfisoDbeta=df['pfjet_pfiso'],
            ncands=df['pfjet_pfcands_n'],
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype == 3).sum()
        ndsa = (ljdautype == 8).sum()
        isegammajet = (npfmu == 0) & (ndsa == 0)
        ispfmujet = (npfmu >= 2) & (ndsa == 0)
        isdsajet = ndsa > 0
        label = isegammajet.astype(int) * 1 + ispfmujet.astype(
            int) * 2 + isdsajet.astype(int) * 3
        leptonjets.add_attributes(label=label)
        nmu = ((ljdautype == 3) | (ljdautype == 8)).sum()
        leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0))

        ## __twoleptonjets__
        twoleptonjets = leptonjets.counts >= 2
        dileptonjets = leptonjets[twoleptonjets]
        wgt = weight[twoleptonjets]

        if dileptonjets.size == 0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum() == 1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents
                         & muljInLeading2Events).astype(int) * 1

        doubleMuljEvents = dileptonjets.ismutype.sum() == 2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        isControl = (np.abs(lj0.p4.delta_phi(lj1.p4)) < np.pi / 2).flatten()

        ## __isControl__
        if self.dphi_control:
            dileptonjets = dileptonjets[isControl]
            wgt = wgt[isControl]
            lj0 = lj0[isControl]
            lj1 = lj1[isControl]
            channel_ = channel_[isControl]
        else:
            dileptonjets = dileptonjets
        if dileptonjets.size == 0: return output

        if self.data_type == 'bkg':
            wgt *= bkgSCALE[dataset]

        output['all05'] += processor.column_accumulator(
            dileptonjets.pfisoAll05.flatten())
        output['nopu05'] += processor.column_accumulator(
            dileptonjets.pfisoNopu05.flatten())
        output['dbeta'] += processor.column_accumulator(
            dileptonjets.pfisoDbeta.flatten())
        output['all05w'] += processor.column_accumulator(
            (dileptonjets.pfisoAll05 / dileptonjets.ncands).flatten())
        output['nopu05w'] += processor.column_accumulator(
            (dileptonjets.pfisoNopu05 / dileptonjets.ncands).flatten())
        output['dbetaw'] += processor.column_accumulator(
            (dileptonjets.pfisoDbeta / dileptonjets.ncands).flatten())
        output['pt'] += processor.column_accumulator(dileptonjets.pt.flatten())
        output['eta'] += processor.column_accumulator(
            dileptonjets.eta.flatten())
        output['wgt'] += processor.column_accumulator(
            (dileptonjets.pt.ones_like() * wgt).flatten())
        output['ljtype'] += processor.column_accumulator(
            (dileptonjets.ismutype.astype(int) * 1 +
             dileptonjets.iseltype.astype(int) * 2).flatten())
        output['channel'] += processor.column_accumulator(
            (dileptonjets.pt.ones_like() * channel_).flatten())

        return output
Beispiel #28
0
def empty_column_accumulator_bool():
    return processor.column_accumulator(np.array([], dtype=np.bool))
    def __init__(
        self,
        year='2017',
        systematics=True,
        jet_arbitration='pt',
        tagger='v2',
        nnlops_rew=False,
        skipJER=False,
        tightMatch=False,
        newTrigger=False,
        looseTau=False,
        newVjetsKfactor=False,
        ak4tagger='deepcsv',
        skipRunB=False,
        finebins=False,
        ewkHcorr=False,
        evtVizInfo=False,
    ):
        self._year = year
        self._tagger = tagger
        self.systematics = systematics
        self._nnlops_rew = nnlops_rew  # for 2018, reweight POWHEG to NNLOPS
        self._jet_arbitration = jet_arbitration
        self._skipJER = skipJER
        self._tightMatch = tightMatch
        self._newVjetsKfactor = newVjetsKfactor
        self._newTrigger = newTrigger  # Fewer triggers, new maps (2017 only, ~no effect)
        self._looseTau = looseTau  # Looser tau veto
        self._ewkHcorr = ewkHcorr
        self._ak4tagger = ak4tagger
        self._skipRunB = skipRunB
        self._finebins = finebins
        self._evtVizInfo = evtVizInfo

        if self._ak4tagger == 'deepcsv':
            self._ak4tagBranch = 'btagDeepB'
        elif self._ak4tagger == 'deepjet':
            self._ak4tagBranch = 'btagDeepFlavB'
        else:
            raise NotImplementedError()

        self._btagSF = BTagCorrector(year, self._ak4tagger, 'medium')

        self._msdSF = {
            '2016': 1.,
            '2017': 0.987,
            '2018': 0.970,
        }

        self._muontriggers = {
            '2016': [
                'Mu50',  # TODO: check
            ],
            '2017': [
                'Mu50',
                'TkMu50',
            ],
            '2018': [
                'Mu50',  # TODO: check
            ],
        }

        self._triggers = {
            '2016': [
                'PFHT800',
                'PFHT900',
                'AK8PFJet360_TrimMass30',
                'AK8PFHT700_TrimR0p1PT0p03Mass50',
                'PFHT650_WideJetMJJ950DEtaJJ1p5',
                'PFHT650_WideJetMJJ900DEtaJJ1p5',
                'AK8DiPFJet280_200_TrimMass30_BTagCSV_p20',
                'PFJet450',
            ],
            '2017': [
                'AK8PFJet330_PFAK8BTagCSV_p17',
                'PFHT1050',
                'AK8PFJet400_TrimMass30',
                'AK8PFJet420_TrimMass30',  # redundant
                'AK8PFHT800_TrimMass50',
                'PFJet500',
                'AK8PFJet500',
            ],
            '2018': [
                'AK8PFJet400_TrimMass30',
                'AK8PFJet420_TrimMass30',
                'AK8PFHT800_TrimMass50',
                'PFHT1050',
                'PFJet500',
                'AK8PFJet500',
                'AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4',
            ],
        }

        # https://twiki.cern.ch/twiki/bin/view/CMS/MissingETOptionalFiltersRun2
        self._met_filters = {
            '2016': {
                'data': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    'eeBadScFilter',
                ],
                'mc': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    # 'eeBadScFilter',
                ],
            },
            '2017': {
                'data': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    'eeBadScFilter',
                    'ecalBadCalibFilterV2',
                ],
                'mc': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    # 'eeBadScFilter',
                    'ecalBadCalibFilterV2',
                ],
            },
            '2018': {
                'data': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    'eeBadScFilter',
                    'ecalBadCalibFilterV2',
                ],
                'mc': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    # 'eeBadScFilter',
                    'ecalBadCalibFilterV2',
                ],
            },
        }

        self._json_paths = {
            '2016':
            'jsons/Cert_271036-284044_13TeV_23Sep2016ReReco_Collisions16_JSON.txt',
            '2017':
            'jsons/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt',
            '2018':
            'jsons/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt',
        }

        if self._tagger == 'v3':
            taggerbins = (
                hist2.axis.Variable([0, 0.7, 0.89, 1],
                                    name='ddb',
                                    label=r'Jet ddb score',
                                    flow=False),
                hist2.axis.Variable([0, 0.44, .84, 1],
                                    name='ddc',
                                    label=r'Jet ddc score',
                                    flow=False),
                hist2.axis.Variable([0, 0.017, 0.11, 1],
                                    name='ddcvb',
                                    label=r'Jet ddcvb score',
                                    flow=False),
            )
        else:
            taggerbins = (
                # hist2.axis.Variable([0, 0.7, 0.89, 1], name='ddb', label=r'Jet ddb score', flow=False),
                # hist2.axis.Variable([0, 0.34, .45, 0.49, 1], name='ddc', label=r'Jet ddc score', flow=False),
                # hist2.axis.Variable([0, 0.03, 0.035, 1], name='ddcvb', label=r'Jet ddcvb score', flow=F
                hist2.axis.Variable([0, 0.7, 1],
                                    name='ddb',
                                    label=r'Jet ddb score',
                                    flow=False),
                hist2.axis.Variable([0, 0.4, 0.45, 0.5, 0.7, 1],
                                    name='ddc',
                                    label=r'Jet ddc score',
                                    flow=False),
                hist2.axis.Variable([0, 0.01, 0.03, 0.1, 1],
                                    name='ddcvb',
                                    label=r'Jet ddcvb score',
                                    flow=False),
            )
        if self._finebins:
            mass_bins = hist2.axis.Regular(200,
                                           40,
                                           200,
                                           name='msd',
                                           label=r'Jet $m_{sd}$')
            pt_bins = hist2.axis.Variable(
                [450, 475, 500, 550, 600, 675, 800, 1200],
                name='pt',
                label=r'Jet $p_{T}$ [GeV]')
        else:
            mass_bins = hist2.axis.Regular(23,
                                           40,
                                           201,
                                           name='msd',
                                           label=r'Jet $m_{sd}$',
                                           flow=False)
            pt_bins = hist2.axis.Variable(
                [450, 475, 500, 550, 600, 675, 800, 1200],
                name='pt',
                label=r'Jet $p_{T}$ [GeV]',
                flow=False)
        gen_axis = hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor')

        optbins = np.r_[np.linspace(0, 0.15, 30, endpoint=False),
                        np.linspace(0.15, 1, 86)]
        self.make_output = lambda: {
            'sumw':
            0.,
            'to_check': {
                "mass": processor.column_accumulator(np.array([])),
                "njet": processor.column_accumulator(np.array([])),
                "fname": processor.column_accumulator(np.array([])),
                "run": processor.column_accumulator(np.array([])),
                "luminosityBlock": processor.column_accumulator(np.array([])),
                "event": processor.column_accumulator(np.array([])),
            },
            'cutflow_msd':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                gen_axis,
                hist2.axis.IntCategory(
                    [0, 1, 2, 3], name='cut', label='Cut index', growth=True),
                mass_bins,
                hist2.storage.Weight(),
            ),
            'cutflow_eta':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                gen_axis,
                hist2.axis.IntCategory(
                    [0, 1, 2, 3], name='cut', label='Cut index', growth=True),
                hist2.axis.Regular(
                    40, -2.5, 2.5, name='eta', label=r'Jet $\eta$'),
                hist2.storage.Weight(),
            ),
            'cutflow_pt':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                gen_axis,
                hist2.axis.IntCategory(
                    [0, 1, 2, 3], name='cut', label='Cut index', growth=True),
                hist2.axis.Regular(
                    100, 400, 1200, name='pt', label=r'Jet $p_{T}$ [GeV]'),
                hist2.storage.Weight(),
            ),
            'nminus1_n2ddt':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.Regular(
                    40, -0.25, 0.25, name='n2ddt', label='N2ddt value'),
                hist2.storage.Weight(),
            ),
            'btagWeight':
            hist2.Hist(
                hist2.axis.Regular(
                    50, 0, 3, name='val', label='BTag correction'),
                hist2.storage.Weight(),
            ),
            'templates':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.StrCategory([], name='systematic', growth=True),
                hist2.axis.StrCategory([], name='runid', growth=True),
                gen_axis,
                pt_bins,
                mass_bins,
                *taggerbins,
                hist2.storage.Weight(),
            ),
            # 'etaphi': hist2.Hist(
            #     hist2.axis.StrCategory([], name='region', growth=True),
            #     hist2.axis.StrCategory([], name='systematic', growth=True),
            #     hist2.axis.StrCategory([], name='runid', growth=True),
            #     hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'),
            #     hist2.axis.Regular(30, -2.5, 2.5, name='eta', label=r'Jet $\eta$'),
            #     hist2.axis.Regular(30, -3.14, 3.14, name='phi', label=r'Jet $\phi$'),
            #     pt_bins,
            #     *taggerbins[1:],
            #     hist2.storage.Weight(),
            # ),
            'wtag':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.StrCategory([], name='systematic', growth=True),
                gen_axis,
                hist2.axis.Variable(
                    [-1, 0, 1
                     ], name='n2ddt', label=r'N2ddt value', flow=False),
                hist2.axis.Variable([200, 250, 300, 350, 400, 450, 1200],
                                    name='pt',
                                    label=r'Jet $p_{T}$ [GeV]'),
                hist2.axis.Regular(
                    46, 40, 201, name='msd', label=r'Jet $m_{sd}$', flow=False
                ),
                *taggerbins[1:],
                hist2.storage.Weight(),
            ),
            'signal_opt':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'),
                hist2.axis.Variable(
                    optbins, name='ddc', label=r'Jet CvL score'),
                hist2.axis.Variable(
                    optbins, name='ddcvb', label=r'Jet CvB score'),
                hist2.axis.Variable([40, 70, 80, 90, 100, 110, 120, 130, 140],
                                    name='msd',
                                    label=r'Jet $m_{sd}$'),
                hist2.storage.Weight(),
            ),
            'signal_optb':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'),
                hist2.axis.Variable(
                    optbins, name='ddb', label=r'Jet BvL score'),
                hist2.axis.Variable([40, 70, 80, 90, 100, 110, 120, 130, 140],
                                    name='msd',
                                    label=r'Jet $m_{sd}$'),
                hist2.storage.Weight(),
            ),
            'genresponse_noweight':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.StrCategory([], name='systematic', growth=True),
                hist2.axis.Variable([450, 500, 550, 600, 675, 800, 1200],
                                    name='pt',
                                    label=r'Jet $p_{T}$ [GeV]'),
                hist2.axis.Variable(np.geomspace(400, 1200, 60),
                                    name='genpt',
                                    label=r'Generated Higgs $p_{T}$ [GeV]'),
                hist2.storage.Double(),
            ),
            'genresponse':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.StrCategory([], name='systematic', growth=True),
                hist2.axis.Variable([450, 500, 550, 600, 675, 800, 1200],
                                    name='pt',
                                    label=r'Jet $p_{T}$ [GeV]'),
                hist2.axis.Variable([200, 300, 450, 650, 7500],
                                    name='genpt',
                                    label=r'Generated Higgs $p_{T}$ [GeV]'),
                hist2.storage.Weight(),
            ),
        }
Beispiel #30
0
    def process(self, events):
        output = self.accumulator.identity()

        ############### Cuts
        # Dimu cuts: charge = 0, mass cuts and chi2...
        # test if there is any events in the file
        if len(events) == 0:
            return output

        ############### Get the main primary vertex properties ############### 
        Primary_vertex = ak.zip({**get_vars_dict(events, primary_vertex_cols)})

        ############### Get the gen particles properties ############### 
        if (self.analysis_type == 'mc'): 
            Gen_particles = ak.zip({**get_vars_dict(events, gen_part_cols)})
        elif (self.analysis_type == 'data'):
            Gen_particles = ak.zip([[]])

        ## Cut for Gen jpsi
        if (self.analysis_type == 'mc'): 
            Gen_Jpsi = Gen_particles[Gen_particles.pdgId == 443]

        # Cut for Gen Dstar
        if (self.analysis_type == 'mc'): 
            Gen_Dstar = Gen_particles[Gen_particles.pdgId == 413]

        # Cut for Gen D0
        if (self.analysis_type == 'mc'): 
            Gen_D0 = Gen_particles[Gen_particles.pdgId == 421]
        
                
        ############### Get All the interesting candidates from NTuples
        Dimu = ak.zip({**get_vars_dict(events, dimu_cols)}, with_name="PtEtaPhiMCandidate")
        Muon = ak.zip({**get_vars_dict(events, muon_cols)}, with_name="PtEtaPhiMCandidate")
        D0 = ak.zip({'mass': events.D0_mass12, **get_vars_dict(events, d0_cols)}, with_name="PtEtaPhiMCandidate")
        Dstar = ak.zip({'mass': (events.DstarD0_mass + events.Dstar_deltamr),
                        'charge': events.Dstarpis_chg,
                        **get_vars_dict(events, dstar_cols)}, 
                        with_name="PtEtaPhiMCandidate")

        output['cutflow']['Number of events'] += len(events)
        output['cutflow']['Number of Dimu'] += ak.sum(ak.num(Dimu))
        output['cutflow']['all D0']      += ak.sum(ak.num(D0))
        output['cutflow']['all Dstar']   += ak.sum(ak.num(Dstar))

        ############### Dimu cuts charge = 0, mass cuts and chi2...
        Dimu = Dimu[Dimu.charge == 0]
        output['cutflow']['Dimu 0 charge'] += ak.sum(ak.num(Dimu))

        Dimu = Dimu[((Dimu.mass > 8.5) & (Dimu.mass < 11.5)) | ((Dimu.mass > 2.9) & (Dimu.mass < 3.3)) | ((Dimu.mass > 3.35) & (Dimu.mass < 4.05))]
        output['cutflow']['Quarkonia mass'] += ak.sum(ak.num(Dimu))
        
        # Prompt/nomprompt cut for jpsi
        dimuon_prompt_cut = (Dimu.dlSig > 0) & (Dimu.dlSig < 2.5)
        dimuon_nonprompt_cut = (Dimu.dlSig > 4) 
        #Dimu = Dimu[dimuon_nonprompt_cut]
        #output['cutflow']['Dimu prompt'] += ak.sum(ak.num(Dimu))

        # Pointing angle cut for jpsi
        dimuon_pointing_cut = (Dimu.cosphi > 0.99)
        #Dimu = Dimu[dimuon_pointing_cut]

        ############### Get the Muons from Dimu, for cuts in their params
        Muon = ak.zip({'0': Muon[Dimu.t1_muIdx], '1': Muon[Dimu.t2_muIdx]})

        # SoftId and Global Muon cuts
        soft_id = (Muon.slot0.softId > 0) & (Muon.slot1.softId > 0)
        Dimu = Dimu[soft_id]
        Muon = Muon[soft_id]
        output['cutflow']['Dimu muon softId'] += ak.sum(ak.num(Dimu))

        global_muon = (Muon.slot0.isGlobal > 0) & (Muon.slot1.isGlobal > 0)
        Dimu = Dimu[global_muon]
        Muon = Muon[global_muon]
        output['cutflow']['Dimu muon global'] += ak.sum(ak.num(Dimu))

        # pt and eta cuts
        if loose:
            muon_pt_cut = (Muon.slot0.pt > 1) & (Muon.slot1.pt > 1)

        else:
            
            muon_pt_cut = (Muon.slot0.pt > 3) & (Muon.slot1.pt > 3)
        
        Dimu = Dimu[muon_pt_cut]
        Muon = Muon[muon_pt_cut]
        output['cutflow']['Dimu muon pt cut'] += ak.sum(ak.num(Dimu))

        muon_eta_cut = (np.absolute(Muon.slot0.eta) <= 2.4) & (np.absolute(Muon.slot1.eta) <= 2.4)
        Dimu = Dimu[muon_eta_cut]
        Muon = Muon[muon_eta_cut]
        output['cutflow']['Dimu muon eta cut'] += ak.sum(ak.num(Dimu))
        
        #dimu_pt_cut = (Dimu.pt > 22) & (Dimu.pt < 26)
        #Dimu = Dimu[dimu_pt_cut]

        #dimu_rap_cut = (Dimu.rap > 1.2) & (Dimu.rap < 1.8)
        #Dimu = Dimu[dimu_rap_cut]

        Dimu['is_ups'] = (Dimu.mass > 8.5) & (Dimu.mass < 11.5)
        Dimu['is_jpsi'] = (Dimu.mass > 2.9) & (Dimu.mass < 3.3)
        Dimu['is_psi'] = (Dimu.mass > 3.35) & (Dimu.mass < 4.05)

        ############### Cuts for D0
        D0 = D0[~D0.hasMuon]
        output['cutflow']['D0 trk muon cut'] += ak.sum(ak.num(D0))

        if loose:
            D0 = D0[(D0.t1_pt > 0.4) & (D0.t2_pt > 0.4)]
            output['cutflow']['D0 trk pt cut'] += ak.sum(ak.num(D0))

            D0 = D0[(D0.t1_chindof < 4) & (D0.t2_chindof < 4)]
            output['cutflow']['D0 trk chi2 cut'] += ak.sum(ak.num(D0))

            D0 = D0[(D0.t1_nValid > 2) & (D0.t2_nValid > 2) & (D0.t1_nPix > 1) & (D0.t2_nPix > 1)]
            output['cutflow']['D0 trk hits cut'] += ak.sum(ak.num(D0))

            D0 = D0[(D0.t1_dxy < 0.1) & (D0.t2_dxy < 0.1)]
            output['cutflow']['D0 trk dxy cut'] += ak.sum(ak.num(D0))

            D0 = D0[(D0.t1_dz < 1.) & (D0.t2_dz < 1.)]
            output['cutflow']['D0 trk dz cut'] += ak.sum(ak.num(D0))
        
        else:
            

            D0 = D0[(D0.t1_pt > 0.8) & (D0.t2_pt > 0.8)]
            output['cutflow']['D0 trk pt cut'] += ak.sum(ak.num(D0))

            D0 = D0[(D0.t1_chindof < 2.5) & (D0.t2_chindof < 2.5)]
            output['cutflow']['D0 trk chi2 cut'] += ak.sum(ak.num(D0))

            D0 = D0[(D0.t1_nValid > 4) & (D0.t2_nValid > 4) & (D0.t1_nPix > 1) & (D0.t2_nPix > 1)]
            output['cutflow']['D0 trk hits cut'] += ak.sum(ak.num(D0))

            D0 = D0[(D0.t1_dxy < 0.1) & (D0.t2_dxy < 0.1)]
            output['cutflow']['D0 trk dxy cut'] += ak.sum(ak.num(D0))

            D0 = D0[(D0.t1_dz < 1.) & (D0.t2_dz < 1.)]
            output['cutflow']['D0 trk dz cut'] += ak.sum(ak.num(D0))

        # D0 cosphi
        if loose:
            D0 = D0[D0.cosphi > 0.1]

        else:

            D0 = D0[D0.cosphi > 0.99]
        output['cutflow']['D0 cosphi cut'] += ak.sum(ak.num(D0))

        # D0 dl Significance
        if loose:
            D0 = D0[D0.dlSig > 5.]
        else:

            D0 = D0[D0.dlSig > 5.]
        output['cutflow']['D0 dlSig cut'] += ak.sum(ak.num(D0))

        # D0 pt
        D0 = D0[D0.pt > 3.]
        output['cutflow']['D0 pt cut'] += ak.sum(ak.num(D0))

        ############### Cuts for Dstar

        # trks cuts
        Dstar = Dstar[~Dstar.hasMuon]
        output['cutflow']['Dstar trk muon cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[(Dstar.K_pt > 0.5) & (Dstar.pi_pt > 0.5)]
        output['cutflow']['Dstar trk pt cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[(Dstar.K_chindof < 2.5) & (Dstar.pi_chindof < 2.5)]
        output['cutflow']['Dstar trk pt cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[(Dstar.K_nValid > 4) & (Dstar.pi_nValid > 4) & (Dstar.K_nPix > 1) & (Dstar.pi_nPix > 1)]
        output['cutflow']['Dstar trk hits cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[(Dstar.K_dxy < 0.1) & (Dstar.pi_dxy < 0.1)]
        output['cutflow']['Dstar trk pt cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[(Dstar.K_dz < 1) & (Dstar.pi_dz < 1)]
        output['cutflow']['Dstar trk pt cut'] += ak.sum(ak.num(Dstar))

        # pis cuts
        Dstar = Dstar[Dstar.pis_pt > 0.3]
        output['cutflow']['Dstar pis pt cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[Dstar.pis_chindof < 3]
        output['cutflow']['Dstar pis chi2 cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[Dstar.pis_nValid > 2]
        output['cutflow']['Dstar pis hits cut'] += ak.sum(ak.num(Dstar))

        # D0 of Dstar cuts
        Dstar = Dstar[Dstar.D0_cosphi > 0.99]
        output['cutflow']['Dstar D0 cosphi cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[(Dstar.D0_mass < D0_PDG_MASS + 0.025) & (Dstar.D0_mass > D0_PDG_MASS - 0.025)]
        output['cutflow']['Dstar D0 mass cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[Dstar.D0_pt > 3]
        output['cutflow']['Dstar D0 pt cut'] += ak.sum(ak.num(Dstar))

        Dstar = Dstar[Dstar.D0_dlSig > 3]
        output['cutflow']['Dstar D0 dlSig cut'] += ak.sum(ak.num(Dstar))

        Dstar['wrg_chg'] = (Dstar.K_chg == Dstar.pi_chg)

        ############### Dimu + OpenCharm associations

        DimuDstar = association(Dimu, Dstar)

        ############### Final computation of number of objects
        output['cutflow']['Dimu final']    += ak.sum(ak.num(Dimu))
        output['cutflow']['D0 final']      += ak.sum(ak.num(D0))
        output['cutflow']['Dstar final']   += ak.sum(ak.num(Dstar))
        output['cutflow']['Dimu Dstar Associated'] += ak.sum(ak.num(DimuDstar))

        ############### Leading and Trailing muon separation Gen_particles
        leading_mu = (Muon.slot0.pt > Muon.slot1.pt)
        Muon_lead = ak.where(leading_mu, Muon.slot0, Muon.slot1)
        Muon_trail = ak.where(~leading_mu, Muon.slot0, Muon.slot1)

        ############### Create the accumulators to save output

        # Primary vertex accumulator
        primary_vertex_acc = processor.dict_accumulator({})
        for var in Primary_vertex.fields:
            primary_vertex_acc[var] = processor.column_accumulator(ak.to_numpy(Primary_vertex[var]))
        output["Primary_vertex"] = primary_vertex_acc

        # Gen Particles accumulator
        gen_part_acc = processor.dict_accumulator({})
        if (self.analysis_type == 'mc'):
            for var in Gen_particles.fields:
                gen_part_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Gen_particles[var])))
            gen_part_acc['nGenPart'] = processor.column_accumulator(ak.to_numpy(ak.num(Gen_particles))) 
            output["Gen_particles"] = gen_part_acc

        # Gen Jpsi accumulator
        gen_jpsi_acc = processor.dict_accumulator({})
        if (self.analysis_type == 'mc'):
            for var in Gen_Jpsi.fields:
                gen_jpsi_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Gen_Jpsi[var])))
            gen_jpsi_acc['nGenJpsi'] = processor.column_accumulator(ak.to_numpy(ak.num(Gen_Jpsi))) 
            output["Gen_Jpsi"] = gen_jpsi_acc
        
        # Gen Dstar accumulator
        gen_dstar_acc = processor.dict_accumulator({})
        if (self.analysis_type == 'mc'):
            for var in Gen_Dstar.fields:
                gen_dstar_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Gen_Dstar[var])))
            gen_dstar_acc['nGenDstar'] = processor.column_accumulator(ak.to_numpy(ak.num(Gen_Dstar[var])))
            output["Gen_Dstar"] = gen_dstar_acc
            
        # Gen D0 accumulator
        gen_d0_acc = processor.dict_accumulator({})
        if (self.analysis_type == 'mc'):
            for var in Gen_D0.fields:
                gen_d0_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Gen_D0[var])))
            gen_d0_acc['nGenD0'] = processor.column_accumulator(ak.to_numpy(ak.num(Gen_D0[var])))
            output["Gen_D0"] = gen_d0_acc
            
    
        muon_lead_acc = processor.dict_accumulator({})
        for var in Muon_lead.fields:
            muon_lead_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Muon_lead[var])))
        muon_lead_acc["nMuon"] = processor.column_accumulator(ak.to_numpy(ak.num(Muon_lead)))
        output["Muon_lead"] = muon_lead_acc
        

        muon_trail_acc = processor.dict_accumulator({})
        for var in Muon_trail.fields:
            muon_trail_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Muon_trail[var])))
        muon_trail_acc["nMuon"] = processor.column_accumulator(ak.to_numpy(ak.num(Muon_trail)))
        output["Muon_trail"] = muon_trail_acc

        dimu_acc = processor.dict_accumulator({})
        for var in Dimu.fields:
            if (var.startswith('t')): continue
            dimu_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Dimu[var])))
        dimu_acc["nDimu"] = processor.column_accumulator(ak.to_numpy(ak.num(Dimu)))
        output["Dimu"] = dimu_acc

        D0_acc = processor.dict_accumulator({})
        D0_trk_acc = processor.dict_accumulator({})
        for var in D0.fields:
            if (var.startswith('t')):
                D0_trk_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(D0[var])))
            else:
                D0_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(D0[var])))
        D0_acc["nD0"] = processor.column_accumulator(ak.to_numpy(ak.num(D0)))
        output["D0"] = D0_acc
        output["D0_trk"] = D0_trk_acc

        Dstar_acc = processor.dict_accumulator({})
        Dstar_D0_acc = processor.dict_accumulator({})
        Dstar_trk_acc = processor.dict_accumulator({})
        for var in Dstar.fields:
            if var.startswith('D0'):
                Dstar_D0_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Dstar[var])))
            elif (var.startswith('K') or var.startswith('pi')):
                Dstar_trk_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Dstar[var])))
            else:
                Dstar_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Dstar[var])))
        Dstar_acc["nDstar"] = processor.column_accumulator(ak.to_numpy(ak.num(Dstar)))
        output["Dstar"] = Dstar_acc
        output["Dstar_D0"] = Dstar_D0_acc
        output["Dstar_trk"] = Dstar_trk_acc

        DimuDstar_acc = processor.dict_accumulator({})
        DimuDstar_acc['Dimu'] = processor.dict_accumulator({})
        DimuDstar_acc['Dstar'] = processor.dict_accumulator({})
        for var in DimuDstar.fields:
            if (var == '0') or (var =='1'):
                continue
            elif var == 'cand':
                for i0 in DimuDstar[var].fields:
                    DimuDstar_acc[i0] = processor.column_accumulator(ak.to_numpy(ak.flatten(DimuDstar[var][i0])))
            else:
                DimuDstar_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(DimuDstar[var])))

        for var in DimuDstar.slot0.fields:
            DimuDstar_acc['Dimu'][var] = processor.column_accumulator(ak.to_numpy(ak.flatten(DimuDstar.slot0[var])))

        for var in DimuDstar.slot1.fields:
            DimuDstar_acc['Dstar'][var] = processor.column_accumulator(ak.to_numpy(ak.flatten(DimuDstar.slot1[var])))
        DimuDstar_acc['nDimuDstar'] = processor.column_accumulator(ak.to_numpy(ak.num(DimuDstar)))
        output['DimuDstar'] = DimuDstar_acc

        file_hash = str(random.getrandbits(128)) + str(len(events))
        save(output, "output/" + self.analyzer_name + "/" + self.analyzer_name + "_" + file_hash + ".coffea")

        # return dummy accumulator
        return processor.dict_accumulator({
                'cutflow': output['cutflow']
        })