def setupNPArray(self, cuts, variables): varDict = {} for v, d in variables.items(): if d[4] > 0: varDict[v] = processor.column_accumulator(np.zeros(shape=(0))) for name in cuts.keys(): varDict[name] = processor.column_accumulator(np.zeros(shape=(0))) self._accumulator = processor.dict_accumulator(varDict) self.setupNPArr = True
def __init__(self): self._accumulator = processor.dict_accumulator({ 'sumw': processor.defaultdict_accumulator(float), 'nevents': processor.defaultdict_accumulator(float), 'variables': processor.defaultdict_accumulator(processor.column_accumulator(np.transpose(np.array([[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]))).identity), 'variables_merged': processor.defaultdict_accumulator(processor.column_accumulator(np.transpose(np.array([[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]))).identity), 'weights': processor.defaultdict_accumulator(processor.column_accumulator(np.array([])).identity), 'weights_merged': processor.defaultdict_accumulator(processor.column_accumulator(np.array([])).identity), })
def test_accumulators(): a = processor.value_accumulator(float) a += 3. assert a.value == 3. assert a.identity().value == 0. a = processor.value_accumulator(partial(np.array, [2.])) a += 3. assert np.array_equal(a.value, np.array([5.])) assert np.array_equal(a.identity().value, np.array([2.])) l = processor.list_accumulator(range(4)) l += [3] l += processor.list_accumulator([1, 2]) assert l == [0, 1, 2, 3, 3, 1, 2] b = processor.set_accumulator({'apples', 'oranges'}) b += {'pears'} b += 'grapes' assert b == {'apples', 'oranges', 'pears', 'grapes'} c = processor.dict_accumulator({'num': a, 'fruit': b}) c['num'] += 2. c += processor.dict_accumulator({ 'num2': processor.value_accumulator(int), 'fruit': processor.set_accumulator({'apples', 'cherries'}), }) assert c['num2'].value == 0 assert np.array_equal(c['num'].value, np.array([7.])) assert c['fruit'] == {'apples', 'oranges', 'pears', 'grapes', 'cherries'} d = processor.defaultdict_accumulator(float) d['x'] = 0. d['x'] += 4. d['y'] += 5. d['z'] += d['x'] d['x'] += d['y'] assert d['x'] == 9. assert d['y'] == 5. assert d['z'] == 4. assert d['w'] == 0. e = d + c f = processor.defaultdict_accumulator(lambda: 2.) f['x'] += 4. assert f['x'] == 6. f += f assert f['x'] == 12. assert f['y'] == 2. a = processor.column_accumulator(np.arange(6).reshape(2,3)) b = processor.column_accumulator(np.arange(12).reshape(4,3)) a += b assert a.value.sum() == 81
def test_accumulators(): a = processor.value_accumulator(float) a += 3.0 assert a.value == 3.0 assert a.identity().value == 0.0 a = processor.value_accumulator(partial(np.array, [2.0])) a += 3.0 assert np.array_equal(a.value, np.array([5.0])) assert np.array_equal(a.identity().value, np.array([2.0])) lacc = processor.list_accumulator(range(4)) lacc += [3] lacc += processor.list_accumulator([1, 2]) assert lacc == [0, 1, 2, 3, 3, 1, 2] b = processor.set_accumulator({"apples", "oranges"}) b += {"pears"} b += "grapes" assert b == {"apples", "oranges", "pears", "grapes"} c = processor.dict_accumulator({"num": a, "fruit": b}) c["num"] += 2.0 c += processor.dict_accumulator({ "num2": processor.value_accumulator(int), "fruit": processor.set_accumulator({"apples", "cherries"}), }) assert c["num2"].value == 0 assert np.array_equal(c["num"].value, np.array([7.0])) assert c["fruit"] == {"apples", "oranges", "pears", "grapes", "cherries"} d = processor.defaultdict_accumulator(float) d["x"] = 0.0 d["x"] += 4.0 d["y"] += 5.0 d["z"] += d["x"] d["x"] += d["y"] assert d["x"] == 9.0 assert d["y"] == 5.0 assert d["z"] == 4.0 assert d["w"] == 0.0 f = processor.defaultdict_accumulator(lambda: 2.0) f["x"] += 4.0 assert f["x"] == 6.0 f += f assert f["x"] == 12.0 assert f["y"] == 2.0 a = processor.column_accumulator(np.arange(6).reshape(2, 3)) b = processor.column_accumulator(np.arange(12).reshape(4, 3)) a += b assert a.value.sum() == 81
def __init__(self, data_type='data'): self.data_type = data_type self._accumulator = processor.dict_accumulator({ 'run_1': processor.column_accumulator(np.zeros(shape=(0, ))), 'lumi_1': processor.column_accumulator(np.zeros(shape=(0, ))), 'event_1': processor.column_accumulator(np.zeros(shape=(0, ))), 'run_2': processor.column_accumulator(np.zeros(shape=(0, ))), 'lumi_2': processor.column_accumulator(np.zeros(shape=(0, ))), 'event_2': processor.column_accumulator(np.zeros(shape=(0, ))), 'era_1': processor.column_accumulator(np.zeros(shape=(0, ))), 'era_2': processor.column_accumulator(np.zeros(shape=(0, ))), }) self.pucorrs = get_pu_weights_function() ## NOT applied for now self.nlo_w = get_nlo_weight_function('w') self.nlo_z = get_nlo_weight_function('z')
def __init__(self, category='00'): self.category = category dataset_axis = hist.Cat('dataset', 'dataset') self._accumulator = processor.dict_accumulator({ 'dphi': processor.column_accumulator(np.zeros(shape=(0, ))), })
def fill_tree(variable, values): treeacc = processor.column_accumulator(values) name = f'tree_{region}_{variable}' if dataset in output[name].keys(): output[name][dataset] += treeacc else: output[name][dataset] = treeacc
def __init__(self, cols=[ "pt", "eta", "y", "phi", "mass", "l_xy", "l_xy_unc", "sv_prob", "cos2D" ]): #, name="Bcands", outputfile=None, reuseoutputfile=None): self._cols = cols #self._name = name for col in cols: self[col] = processor.column_accumulator(np.array([]))
'DoubleMuon': fileset_all['DoubleMuon_Run2018'], 'EGamma': fileset_all['EGamma_Run2018'], 'diboson': fileset_all['diboson'], 'TTXnoW': fileset_all['TTXnoW'], 'TTW': fileset_all['TTW'], #'WZ': fileset_all['WZ'], 'DY': fileset_all['DY'], } fileset = make_small(fileset, small, 1) add_processes_to_output(fileset, desired_output) for rle in ['run', 'lumi', 'event']: desired_output.update({ 'MuonEG_%s' % rle: processor.column_accumulator(np.zeros(shape=(0, ))), 'EGamma_%s' % rle: processor.column_accumulator(np.zeros(shape=(0, ))), 'DoubleMuon_%s' % rle: processor.column_accumulator(np.zeros(shape=(0, ))), "M_ll": hist.Hist("Counts", dataset_axis, mass_axis), "M3l": hist.Hist("Counts", dataset_axis, mass_axis), "ST": hist.Hist("Counts", dataset_axis, ht_axis), "HT": hist.Hist("Counts", dataset_axis, ht_axis), "LT": hist.Hist("Counts", dataset_axis, ht_axis), "onZ_pt":
def test_new_accumulators(): a = processor.accumulate((0.0, 3.0)) assert a == 3.0 a = processor.accumulate(( np.array([2.0]), 3.0, )) assert np.array_equal(a, np.array([5.0])) lacc = processor.accumulate(( list(range(4)), [3], [1, 2], )) assert lacc == [0, 1, 2, 3, 3, 1, 2] b = processor.accumulate(( {"apples", "oranges"}, {"pears"}, {"grapes"}, )) assert b == {"apples", "oranges", "pears", "grapes"} c = processor.accumulate(( { "num": a, "fruit": b }, { "num": 2.0 }, { "num2": 0, "fruit": {"apples", "cherries"}, }, )) assert c["num2"] == 0 assert np.array_equal(c["num"], np.array([7.0])) assert c["fruit"] == {"apples", "oranges", "pears", "grapes", "cherries"} d = processor.accumulate(( defaultdict(float), { "x": 4.0, "y": 5.0 }, { "z": 4.0, "x": 5.0 }, )) assert d["x"] == 9.0 assert d["y"] == 5.0 assert d["z"] == 4.0 # this is different than old style! with pytest.raises(KeyError): d["w"] f = processor.accumulate(( defaultdict(lambda: 2.0), defaultdict(lambda: 2, {"x": 4.0}), )) assert f["x"] == 4.0 assert f["y"] == 2.0 # this is different than old style! f = processor.accumulate([f], f) assert f["x"] == 8.0 assert f["y"] == 4.0 assert f["z"] == 2.0 a = processor.accumulate(( processor.column_accumulator(np.arange(6).reshape(2, 3)), processor.column_accumulator(np.arange(12).reshape(4, 3)), )) assert a.value.sum() == 81
def __init__(self): self._accumulator = processor.dict_accumulator({ "j1pt":processor.column_accumulator(np.array([])), "j1phi":processor.column_accumulator(np.array([])), "j1eta":processor.column_accumulator(np.array([])), "j1mass":processor.column_accumulator(np.array([])), "j2pt":processor.column_accumulator(np.array([])), "j2phi":processor.column_accumulator(np.array([])), "j2eta":processor.column_accumulator(np.array([])), "j2mass":processor.column_accumulator(np.array([])), "j3pt":processor.column_accumulator(np.array([])), "j3phi":processor.column_accumulator(np.array([])), "j3eta":processor.column_accumulator(np.array([])), "j3mass":processor.column_accumulator(np.array([])), "dR12":processor.column_accumulator(np.array([])), "dR13":processor.column_accumulator(np.array([])), "dR23":processor.column_accumulator(np.array([])), "j1btag":processor.column_accumulator(np.array([])), "j2btag":processor.column_accumulator(np.array([])), "j3btag":processor.column_accumulator(np.array([])), "j1area":processor.column_accumulator(np.array([])), "j2area":processor.column_accumulator(np.array([])), "j3area":processor.column_accumulator(np.array([])), "j12deta":processor.column_accumulator(np.array([])), "j23deta":processor.column_accumulator(np.array([])), "j13deta":processor.column_accumulator(np.array([])), "j12dphi":processor.column_accumulator(np.array([])), "j23dphi":processor.column_accumulator(np.array([])), "j13dphi":processor.column_accumulator(np.array([])), "j1j2mass":processor.column_accumulator(np.array([])), "j2j3mass":processor.column_accumulator(np.array([])), "j1j3mass":processor.column_accumulator(np.array([])), "event":processor.column_accumulator(np.array([])), "truth":processor.column_accumulator(np.array([])) }) print("done")
'MuonEG': fileset_2018['MuonEG'], 'DoubleMuon': fileset_2018['DoubleMuon'], 'EGamma': fileset_2018['EGamma'], 'diboson': fileset_2018['diboson'], 'TTXnoW': fileset_2018['TTXnoW'], 'TTW': fileset_2018['TTW'], #'WZ': fileset_2018['WZ'], 'DY': fileset_2018['DY'], } fileset = make_small(fileset, small, 1) add_processes_to_output(fileset, desired_output) for rle in ['run', 'lumi', 'event']: desired_output.update({ 'MuonEG_%s'%rle: processor.column_accumulator(np.zeros(shape=(0,))), 'EGamma_%s'%rle: processor.column_accumulator(np.zeros(shape=(0,))), 'DoubleMuon_%s'%rle: processor.column_accumulator(np.zeros(shape=(0,))), }) histograms = sorted(list(desired_output.keys())) if not overwrite: cache.load() if local: exe_args = { 'workers': 12, 'function_args': {'flatten': False},
def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet)>2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) ## Muons muon = Collections(ev, "Muon", "tightSSTTH").get() vetomuon = Collections(ev, "Muon", "vetoTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1) OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge)<0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "tightSSTTH").get() vetoelectron = Collections(ev, "Electron", "vetoTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1) OSelectron = ak.any((dielectron['0'].charge * dielectron['1'].charge)<0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 lepton = ak.concatenate([muon, electron], axis=1) dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1) OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)<0, axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta)<2.4)] btag = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) ## forward jets high_p_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator high_pt_fwd = fwd[ak.singletons(ak.argmax(fwd.pt_nom, axis=1))] # highest transverse momentum spectator high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator ## Get the two leading b-jets in terms of btag score high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2] jf = cross(high_p_fwd, jet) mjf = (jf['0']+jf['1']).mass deltaEta = abs(high_p_fwd.eta - jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta) deltaEtaMax = ak.max(deltaEta, axis=1) mjf_max = ak.max(mjf, axis=1) jj = choose(jet, 2) mjj_max = ak.max((jj['0']+jj['1']).mass, axis=1) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) ht_central = ak.sum(central.pt, axis=1) # define the weight weight = Weights( len(ev) ) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight*cfg['lumi'][self.year]) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) cutflow = Cutflow(output, ev, weight=weight) sel = Selection( dataset = dataset, events = ev, year = self.year, ele = electron, ele_veto = vetoelectron, mu = muon, mu_veto = vetomuon, jet_all = jet, jet_central = central, jet_btag = btag, jet_fwd = fwd, met = ev.MET, ) BL = sel.dilep_baseline(cutflow=cutflow, SS=False) # first, make a few super inclusive plots output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL]) output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL]) output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL]) BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0']) output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb]) output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL]) output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL]) output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL]) BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0']) output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd]) BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50']) output['MET'].fill( dataset = dataset, pt = ev.MET[BL_minusMET].pt, phi = ev.MET[BL_minusMET].phi, weight = weight.weight()[BL_minusMET] ) #output['electron'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(electron[BL].pt)), # eta = ak.to_numpy(ak.flatten(electron[BL].eta)), # phi = ak.to_numpy(ak.flatten(electron[BL].phi)), # weight = weight.weight()[BL] #) # #output['muon'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(muon[BL].pt)), # eta = ak.to_numpy(ak.flatten(muon[BL].eta)), # phi = ak.to_numpy(ak.flatten(muon[BL].phi)), # weight = weight.weight()[BL] #) output['lead_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)), eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)), phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)), weight = weight.weight()[BL] ) output['trail_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)), eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)), phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)), weight = weight.weight()[BL] ) output['fwd_jet'].fill( dataset = dataset, pt = ak.flatten(high_p_fwd[BL].pt_nom), eta = ak.flatten(high_p_fwd[BL].eta), phi = ak.flatten(high_p_fwd[BL].phi), weight = weight.weight()[BL] ) output['b1'].fill( dataset = dataset, pt = ak.flatten(high_score_btag[:, 0:1][BL].pt_nom), eta = ak.flatten(high_score_btag[:, 0:1][BL].eta), phi = ak.flatten(high_score_btag[:, 0:1][BL].phi), weight = weight.weight()[BL] ) output['b2'].fill( dataset = dataset, pt = ak.flatten(high_score_btag[:, 1:2][BL].pt_nom), eta = ak.flatten(high_score_btag[:, 1:2][BL].eta), phi = ak.flatten(high_score_btag[:, 1:2][BL].phi), weight = weight.weight()[BL] ) output['j1'].fill( dataset = dataset, pt = ak.flatten(jet.pt_nom[:, 0:1][BL]), eta = ak.flatten(jet.eta[:, 0:1][BL]), phi = ak.flatten(jet.phi[:, 0:1][BL]), weight = weight.weight()[BL] ) output['j2'].fill( dataset = dataset, pt = ak.flatten(jet[:, 1:2][BL].pt_nom), eta = ak.flatten(jet[:, 1:2][BL].eta), phi = ak.flatten(jet[:, 1:2][BL].phi), weight = weight.weight()[BL] ) output['j3'].fill( dataset = dataset, pt = ak.flatten(jet[:, 2:3][BL].pt_nom), eta = ak.flatten(jet[:, 2:3][BL].eta), phi = ak.flatten(jet[:, 2:3][BL].phi), weight = weight.weight()[BL] ) if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event])) run_ = ak.to_numpy(ev.run) lumi_ = ak.to_numpy(ev.luminosityBlock) event_ = ak.to_numpy(ev.event) output['%s_run'%dataset] += processor.column_accumulator(run_[BL]) output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL]) output['%s_event'%dataset] += processor.column_accumulator(event_[BL]) # Now, take care of systematic unceratinties if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): alljets = getJets(ev, minPt=0, maxEta=4.7) alljets = alljets[(alljets.jetId>1)] for var in self.variations: # get the collections that change with the variations jet = getPtEtaPhi(alljets, pt_var=var) jet = jet[(jet.pt>25)] jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta)<2.4)] btag = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) ## forward jets high_p_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator high_pt_fwd = fwd[ak.singletons(ak.argmax(fwd.pt, axis=1))] # highest transverse momentum spectator high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator ## Get the two leading b-jets in terms of btag score high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2] met = ev.MET met['pt'] = getattr(met, var) sel = Selection( dataset = dataset, events = ev, year = self.year, ele = electron, ele_veto = vetoelectron, mu = muon, mu_veto = vetomuon, jet_all = jet, jet_central = central, jet_btag = btag, jet_fwd = fwd, met = met, ) BL = sel.dilep_baseline(SS=False) # get the modified selection -> more difficult #selection.add('N_jet>2_'+var, (ak.num(jet.pt)>=3)) # stupid bug here... #selection.add('N_btag=2_'+var, (ak.num(btag)==2) ) #selection.add('N_central>1_'+var, (ak.num(central)>=2) ) #selection.add('N_fwd>0_'+var, (ak.num(fwd)>=1) ) #selection.add('MET>30_'+var, (getattr(ev.MET, var)>30) ) ### Don't change the selection for now... #bl_reqs = os_reqs + ['N_jet>2_'+var, 'MET>30_'+var, 'N_btag=2_'+var, 'N_central>1_'+var, 'N_fwd>0_'+var] #bl_reqs_d = { sel: True for sel in bl_reqs } #BL = selection.require(**bl_reqs_d) # the OS selection remains unchanged output['N_jet_'+var].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL]) BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0']) output['N_fwd_'+var].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd]) BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0']) output['N_b_'+var].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb]) output['N_central_'+var].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL]) # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots. output['j1_'+var].fill( dataset = dataset, pt = ak.flatten(jet.pt[:, 0:1][BL]), eta = ak.flatten(jet.eta[:, 0:1][BL]), phi = ak.flatten(jet.phi[:, 0:1][BL]), weight = weight.weight()[BL] ) output['b1_'+var].fill( dataset = dataset, pt = ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]), eta = ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]), phi = ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]), weight = weight.weight()[BL] ) output['fwd_jet_'+var].fill( dataset = dataset, pt = ak.flatten(high_p_fwd[BL].pt), #p = ak.flatten(high_p_fwd[BL].p), eta = ak.flatten(high_p_fwd[BL].eta), phi = ak.flatten(high_p_fwd[BL].phi), weight = weight.weight()[BL] ) BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50']) output['MET_'+var].fill( dataset = dataset, pt = getattr(ev.MET, var)[BL_minusMET], phi = ev.MET[BL_minusMET].phi, weight = weight.weight()[BL_minusMET] ) return output
def process(self, events): output = self._accumulator.identity() jets=events.Jet jetSel = (jets.pt>30) & (abs(jets.eta)<2.4) tightJet = jets[jetSel] bJet = tightJet[tightJet.btagDeepFlavB > 0.642] muons = events.Muon muonSel = (muons.pt>30) & (abs(muons.eta)<2.4) tightMuon = muons[muonSel] ele = events.Electron eleSel = (ele.pt>35)&(abs(ele.eta)<2.4) tightEle = ele[eleSel] eventSel = (((ak.num(tightMuon)==1) | (ak.num(tightEle)==1)) & (ak.num(tightJet)>= 3) & (ak.num(bJet)>=1) ) final = events[eventSel] #####GENPART MATCHING ###### genPart = final.GenPart tops = genPart[abs(genPart.pdgId)==6] #The isLastCopy Flag filters out copy Genparticles: tops = tops[tops.hasFlags('isLastCopy')] tDecay = tops.distinctChildren tDecay = tDecay[tDecay.hasFlags('isLastCopy')] t_Events=tDecay[abs(tDecay.pdgId)==5] W = tDecay[abs(tDecay.pdgId)==24] W = W[W.hasFlags('isLastCopy')] WDecay = W.distinctChildren WDecay = WDecay[WDecay.hasFlags('isLastCopy')] #t_events is the lone bottom, W_events is the -> two jets #select the hadronically decaying W W_Events=ak.flatten(WDecay[ak.all(abs(WDecay.pdgId)<=8,axis=-1)],axis=3) #print(qqb) #HadW is mask for Quark deacying W boson hadW = ak.num(W_Events,axis=2)==2 #filters out t_events that have a hadronically decayign W Boson hadB = t_Events[hadW] hadB = ak.flatten(hadB,axis=2) W_quarks = W_Events[hadW] W_quarks = ak.flatten(W_quarks,axis=2) #concatentating these two arrays make an array of events with the correctly decaying GenParticles. qqb = ak.concatenate([hadB,W_quarks],axis=1) #####GEN JET MATCHING ###### final=final[(ak.count(qqb.pdgId,axis=1)==3)] finaljets=final.Jet qqb=qqb[(ak.count(qqb.pdgId,axis=1)==3)] #Implementing Tight Jet Cuts on Training Data finaljetSel=(abs(finaljets.eta)<2.4)&(finaljets.pt>30) finalJets=finaljets[finaljetSel] #Match Gen part to gen jet matchedGenJets=qqb.nearest(final.GenJet) #match gen to reco matchedJets=matchedGenJets.nearest(finalJets) ### VALIDATION ### test=matchedJets.genJetIdx combs=ak.combinations(finalJets,3,replacement=False) t1=(combs['0'].genJetIdx==test[:,0])|(combs['0'].genJetIdx==test[:,1])|(combs['0'].genJetIdx==test[:,2]) t2=(combs['1'].genJetIdx==test[:,0])|(combs['1'].genJetIdx==test[:,1])|(combs['1'].genJetIdx==test[:,2]) t3=(combs['2'].genJetIdx==test[:,0])|(combs['2'].genJetIdx==test[:,1])|(combs['2'].genJetIdx==test[:,2]) t=t1&t2&t3 trutharray=ak.flatten(t) jetcombos=ak.flatten(combs) j1,j2,j3=ak.unzip(jetcombos) output["dR12"]+=processor.column_accumulator(ak.to_numpy(j1.delta_r(j2))) output["dR13"]+=processor.column_accumulator(ak.to_numpy(j1.delta_r(j3))) output["dR23"]+=processor.column_accumulator(ak.to_numpy(j2.delta_r(j3))) output["j1btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2)) output["j2btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2)) output["j3btag"]+=processor.column_accumulator(ak.to_numpy(j1.btagCSVV2)) output["j1area"]+=processor.column_accumulator(ak.to_numpy(j1.area)) output["j2area"]+=processor.column_accumulator(ak.to_numpy(j2.area)) output["j3area"]+=processor.column_accumulator(ak.to_numpy(j3.area)) output["j12deta"]+=processor.column_accumulator(ak.to_numpy(j1.eta-j2.eta)) output["j23deta"]+=processor.column_accumulator(ak.to_numpy(j2.eta-j3.eta)) output["j13deta"]+=processor.column_accumulator(ak.to_numpy(j1.eta-j3.eta)) output["j12dphi"]+=processor.column_accumulator(ak.to_numpy(j1.phi-j2.phi)) output["j23dphi"]+=processor.column_accumulator(ak.to_numpy(j2.phi-j3.phi)) output["j13dphi"]+=processor.column_accumulator(ak.to_numpy(j1.phi-j3.phi)) output["j1j2mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass+j2.mass)) output["j2j3mass"]+=processor.column_accumulator(ak.to_numpy(j2.mass+j3.mass)) output["j1j3mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass+j3.mass)) output["j1pt"]+=processor.column_accumulator(ak.to_numpy(j1.pt)) output["j1phi"]+=processor.column_accumulator(ak.to_numpy(j1.phi)) output["j1eta"]+=processor.column_accumulator(ak.to_numpy(abs(j1.eta))) output["j1mass"]+=processor.column_accumulator(ak.to_numpy(j1.mass)) output["j2pt"]+=processor.column_accumulator(ak.to_numpy(j2.pt)) output["j2phi"]+=processor.column_accumulator(ak.to_numpy(j2.phi)) output["j2eta"]+=processor.column_accumulator(ak.to_numpy(abs(j2.eta))) output["j2mass"]+=processor.column_accumulator(ak.to_numpy(j2.mass)) output["j3pt"]+=processor.column_accumulator(ak.to_numpy(j3.pt)) output["j3phi"]+=processor.column_accumulator(ak.to_numpy(j3.phi)) output["j3eta"]+=processor.column_accumulator(ak.to_numpy(abs(j3.eta))) output["j3mass"]+=processor.column_accumulator(ak.to_numpy(j3.mass)) output["event"]+=processor.column_accumulator(ak.to_numpy(ak.flatten(ak.broadcast_arrays(final.event,combs['0'].pt)[0]))) output["truth"]+=processor.column_accumulator(ak.to_numpy(trutharray).astype(int)) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_znunu'] = is_lo_znunu(dataset) df['is_lo_w_ewk'] = is_lo_w_ewk(dataset) df['is_lo_z_ewk'] = is_lo_z_ewk(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[ 'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[ 'is_lo_w_ewk'] | df['is_lo_z_ewk'] df['is_data'] = is_data(dataset) gen_v_pt = None if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[ 'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']: gen = setup_gen_candidates(df) dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_combined'] elif df['is_lo_g']: gen = setup_gen_candidates(df) all_gen_photons = gen[(gen.pdg == 22)] prompt_mask = (all_gen_photons.status == 1) & (all_gen_photons.flag & 1 == 1) stat1_mask = (all_gen_photons.status == 1) gen_photons = all_gen_photons[prompt_mask | (~prompt_mask.any()) & stat1_mask] gen_photon = gen_photons[gen_photons.pt.argmax()] gen_v_pt = gen_photon.pt.max() # Generator-level leading dijet mass if df['has_lhe_v_pt']: genjets = setup_lhe_cleaned_genjets(df) digenjet = genjets[:, :2].distincts() df['mjj_gen'] = digenjet.mass.max() df['mjj_gen'] = np.where(df['mjj_gen'] > 0, df['mjj_gen'], 0) # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates( df, cfg) # Remove jets in accordance with the noise recipe if df['year'] == 2017: ak4 = ak4[(ak4.ptraw > 50) | (ak4.abseta < 2.65) | (ak4.abseta > 3.139)] bjets = bjets[(bjets.ptraw > 50) | (bjets.abseta < 2.65) | (bjets.abseta > 3.139)] # Filtering ak4 jets according to pileup ID ak4 = ak4[ak4.puid] # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts == 1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.absetasc < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index = ak4.pt.argmax() elejet_pairs = ak4[:, :1].cross(electrons) df['dREleJet'] = np.hypot( elejet_pairs.i0.eta - elejet_pairs.i1.eta, dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:, :1].cross(muons) df['dRMuonJet'] = np.hypot( muonjet_pairs.i0.eta - muonjet_pairs.i1.eta, dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons, muons, photons) df["dPFCaloSR"] = (met_pt - df["CaloMET_pt"]) / met_pt df["dPFCaloCR"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["dPFTkSR"] = (met_pt - df["TkMET_pt"]) / met_pt df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=5.0) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=5.0) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('at_least_one_tau', taus.counts > 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('mindphijm', df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo_sr', np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('dpfcalo_cr', np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) selection.add('met_sr', met_pt > cfg.SELECTION.SIGNAL.RECOIL) # AK4 dijet diak4 = ak4[:, :2].distincts() leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & ( np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA) trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & ( np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() has_track0 = np.abs(diak4.i0.eta) <= 2.5 has_track1 = np.abs(diak4.i1.eta) <= 2.5 leadak4_id = diak4.i0.tightId & (has_track0 * ( (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) & (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0) trailak4_id = has_track1 * ( (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) & (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1 df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() leading_jet_in_horn = ((diak4.i0.abseta < 3.2) & (diak4.i0.abseta > 2.8)).any() trailing_jet_in_horn = ((diak4.i1.abseta < 3.2) & (diak4.i1.abseta > 2.8)).any() selection.add('hornveto', (df['dPFTkSR'] < 0.8) | ~(leading_jet_in_horn | trailing_jet_in_horn)) if df['year'] == 2018: if df['is_data']: metphihem_mask = ~((met_phi > -1.8) & (met_phi < -0.6) & (df['run'] > 319077)) else: metphihem_mask = pass_all selection.add("metphihemextveto", metphihem_mask) selection.add('no_el_in_hem', electrons[electrons_in_hem(electrons)].counts == 0) else: selection.add("metphihemextveto", pass_all) selection.add('no_el_in_hem', pass_all) selection.add('two_jets', diak4.counts > 0) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) selection.add('hemisphere', hemisphere) selection.add('leadak4_id', leadak4_id.any()) selection.add('trailak4_id', trailak4_id.any()) selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS) selection.add( 'dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI) selection.add( 'detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA) # Cleaning cuts for signal region max_neEmEF = np.maximum(diak4.i0.nef, diak4.i1.nef) selection.add('max_neEmEF', (max_neEmEF < 0.7).any()) vec_b = calculate_vecB(ak4, met_pt, met_phi) vec_dphi = calculate_vecDPhi(ak4, met_pt, met_phi, df['TkMET_phi']) no_jet_in_trk = (diak4.i0.abseta > 2.5).any() & (diak4.i1.abseta > 2.5).any() no_jet_in_hf = (diak4.i0.abseta < 3.0).any() & (diak4.i1.abseta < 3.0).any() at_least_one_jet_in_hf = (diak4.i0.abseta > 3.0).any() | (diak4.i1.abseta > 3.0).any() at_least_one_jet_in_trk = (diak4.i0.abseta < 2.5).any() | (diak4.i1.abseta < 2.5).any() # Categorized cleaning cuts eemitigation = ((no_jet_in_hf | at_least_one_jet_in_trk) & (vec_dphi < 1.0)) | ( (no_jet_in_trk & at_least_one_jet_in_hf) & (vec_b < 0.2)) selection.add('eemitigation', eemitigation) # HF-HF veto in SR both_jets_in_hf = (diak4.i0.abseta > 3.0) & (diak4.i1.abseta > 3.0) selection.add('veto_hfhf', ~both_jets_in_hf.any()) # Divide into three categories for trigger study if cfg.RUN.TRIGGER_STUDY: two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs( diak4.i1.eta) <= 2.4) two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs( diak4.i1.eta) > 2.4) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_forward_jets) selection.add('two_central_jets', two_central_jets.any()) selection.add('two_forward_jets', two_forward_jets.any()) selection.add('one_jet_forward_one_jet_central', one_jet_forward_one_jet_central.any()) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index = photons.pt.argmax() df['is_tight_photon'] = photons.mediumId & photons.barrel selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if df['has_lhe_v_pt']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) if 'LHE_Njets' in df: output['lhe_njets'].fill(dataset=dataset, multiplicity=df['LHE_Njets']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) if 'LHE_HTIncoming' in df: output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons, cfg) weights = pileup_weights(weights, df, evaluator, cfg) weights = ak4_em_frac_weights(weights, diak4, evaluator) if not (gen_v_pt is None): weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt, df['mjj_gen']) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask]] output['kinematics']['met_phi'] += [met_phi[mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[df['is_tight_muon']].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[df['is_tight_electron']].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[df['is_tight_photon']].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights._weights['pileup'].sum() regions = vbfhinv_regions(cfg) # Get veto weights (only for MC) if not df['is_data']: veto_weights = get_veto_weights(df, cfg, evaluator, electrons, muons, taus) for region, cuts in regions.items(): exclude = [None] region_weights = copy.deepcopy(weights) if not df['is_data']: ### Trigger weights if re.match(r'cr_(\d+)e.*', region): p_pass_data = 1 - (1 - evaluator["trigger_electron_eff_data"] (electrons.etasc, electrons.pt)).prod() p_pass_mc = 1 - (1 - evaluator["trigger_electron_eff_mc"] (electrons.etasc, electrons.pt)).prod() trigger_weight = p_pass_data / p_pass_mc trigger_weight[np.isnan(trigger_weight)] = 1 region_weights.add('trigger', trigger_weight) elif re.match(r'cr_(\d+)m.*', region) or re.match( 'sr_.*', region): region_weights.add( 'trigger_met', evaluator["trigger_met"](df['recoil_pt'])) elif re.match(r'cr_g.*', region): photon_trigger_sf(region_weights, photons, df) # Veto weights if re.match('.*no_veto.*', region): exclude = [ "muon_id_iso_tight", "muon_id_tight", "muon_iso_tight", "muon_id_loose", "muon_iso_loose", "ele_reco", "ele_id_tight", "ele_id_loose", "tau_id" ] region_weights.add( "veto", veto_weights.partial_weight(include=["nominal"])) # HEM-veto weights for signal region MC if re.match('^sr_vbf.*', region) and df['year'] == 2018: # Events that lie in the HEM-veto region events_to_weight_mask = (met_phi > -1.8) & (met_phi < -0.6) # Weight is the "good lumi fraction" for 2018 weight = 21.1 / 59.7 hem_weight = np.where(events_to_weight_mask, weight, 1.0) region_weights.add("hem_weight", hem_weight) # This is the default weight for this region rweight = region_weights.partial_weight(exclude=exclude) # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region][dataset]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][dataset][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) if cfg.RUN.SAVE.TREE: if region in ['cr_1e_vbf', 'cr_1m_vbf']: output['tree_int64'][region][ "event"] += processor.column_accumulator( df["event"][mask]) output['tree_float16'][region][ "gen_v_pt"] += processor.column_accumulator( np.float16(gen_v_pt[mask])) output['tree_float16'][region][ "gen_mjj"] += processor.column_accumulator( np.float16(df['mjj_gen'][mask])) output['tree_float16'][region][ "recoil_pt"] += processor.column_accumulator( np.float16(df["recoil_pt"][mask])) output['tree_float16'][region][ "recoil_phi"] += processor.column_accumulator( np.float16(df["recoil_phi"][mask])) output['tree_float16'][region][ "mjj"] += processor.column_accumulator( np.float16(df["mjj"][mask])) output['tree_float16'][region][ "leadak4_pt"] += processor.column_accumulator( np.float16(diak4.i0.pt[mask])) output['tree_float16'][region][ "leadak4_eta"] += processor.column_accumulator( np.float16(diak4.i0.eta[mask])) output['tree_float16'][region][ "leadak4_phi"] += processor.column_accumulator( np.float16(diak4.i0.phi[mask])) output['tree_float16'][region][ "trailak4_pt"] += processor.column_accumulator( np.float16(diak4.i1.pt[mask])) output['tree_float16'][region][ "trailak4_eta"] += processor.column_accumulator( np.float16(diak4.i1.eta[mask])) output['tree_float16'][region][ "trailak4_phi"] += processor.column_accumulator( np.float16(diak4.i1.phi[mask])) output['tree_float16'][region][ "minDPhiJetRecoil"] += processor.column_accumulator( np.float16(df["minDPhiJetRecoil"][mask])) if '_1e_' in region: output['tree_float16'][region][ "leadlep_pt"] += processor.column_accumulator( np.float16(electrons.pt.max()[mask])) output['tree_float16'][region][ "leadlep_eta"] += processor.column_accumulator( np.float16(electrons[ electrons.pt.argmax()].eta.max()[mask])) output['tree_float16'][region][ "leadlep_phi"] += processor.column_accumulator( np.float16(electrons[ electrons.pt.argmax()].phi.max()[mask])) elif '_1m_' in region: output['tree_float16'][region][ "leadlep_pt"] += processor.column_accumulator( np.float16(muons.pt.max()[mask])) output['tree_float16'][region][ "leadlep_eta"] += processor.column_accumulator( np.float16( muons[muons.pt.argmax()].eta.max()[mask])) output['tree_float16'][region][ "leadlep_phi"] += processor.column_accumulator( np.float16( muons[muons.pt.argmax()].phi.max()[mask])) for name, w in region_weights._weights.items(): output['tree_float16'][region][ f"weight_{name}"] += processor.column_accumulator( np.float16(w[mask])) output['tree_float16'][region][ f"weight_total"] += processor.column_accumulator( np.float16(rweight[mask])) if region == 'inclusive': output['tree_int64'][region][ "event"] += processor.column_accumulator( df["event"][mask]) for name in selection.names: output['tree_bool'][region][ name] += processor.column_accumulator( np.bool_(selection.all(*[name])[mask])) # Save the event numbers of events passing this selection # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=rweight[mask]) fill_mult('ak4_mult', ak4[ak4.pt > 30]) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[df['is_tight_muon']]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in region_weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) ezfill("weights_wide", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, rweight[mask]) w_alljets_nopref = weight_shape( ak4[mask].eta, region_weights.partial_weight(exclude=exclude + ['prefire'])[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) ezfill('ak4_eta_nopref', jeteta=ak4[mask].eta.flatten(), weight=w_alljets_nopref) ezfill('ak4_phi_nopref', jetphi=ak4[mask].phi.flatten(), weight=w_alljets_nopref) ezfill('ak4_pt_nopref', jetpt=ak4[mask].pt.flatten(), weight=w_alljets_nopref) # Leading ak4 w_diak4 = weight_shape(diak4.pt[mask], rweight[mask]) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw0', jetpt=diak4.i0.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf0', frac=diak4.i0.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf0', frac=diak4.i0.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst0', nconst=diak4.i0.nconst[mask].flatten(), weight=w_diak4) # Trailing ak4 ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw1', jetpt=diak4.i1.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf1', frac=diak4.i1.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf1', frac=diak4.i1.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst1', nconst=diak4.i1.nconst[mask].flatten(), weight=w_diak4) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], rweight[mask]) ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo_cr', dpfcalo=df["dPFCaloCR"][mask], weight=rweight[mask]) ezfill('dpfcalo_sr', dpfcalo=df["dPFCaloSR"][mask], weight=rweight[mask]) ezfill('met', met=met_pt[mask], weight=rweight[mask]) ezfill('met_phi', phi=met_phi[mask], weight=rweight[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=rweight[mask]) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=rweight[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=rweight[mask]) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=rweight[mask]) ezfill('dphijj', dphi=df["dphijj"][mask], weight=rweight[mask]) ezfill('detajj', deta=df["detajj"][mask], weight=rweight[mask]) ezfill('mjj', mjj=df["mjj"][mask], weight=rweight[mask]) if gen_v_pt is not None: ezfill('gen_vpt', vpt=gen_v_pt[mask], weight=df['Generator_weight'][mask]) ezfill('gen_mjj', mjj=df['mjj_gen'][mask], weight=df['Generator_weight'][mask]) # Photon CR data-driven QCD estimate if df['is_data'] and re.match("cr_g.*", region) and re.match( "(SinglePhoton|EGamma).*", dataset): w_imp = photon_impurity_weights( photons[leadphoton_index].pt.max()[mask], df["year"]) output['mjj'].fill(dataset=data_driven_qcd_dataset(dataset), region=region, mjj=df["mjj"][mask], weight=rweight[mask] * w_imp) output['recoil'].fill(dataset=data_driven_qcd_dataset(dataset), region=region, recoil=df["recoil_pt"][mask], weight=rweight[mask] * w_imp) # Uncertainty variations if df['is_lo_z'] or df['is_nlo_z'] or df['is_lo_z_ewk']: theory_uncs = [x for x in cfg.SF.keys() if x.startswith('unc')] for unc in theory_uncs: reweight = evaluator[unc](gen_v_pt) w = (region_weights.weight() * reweight)[mask] ezfill('mjj_unc', mjj=df['mjj'][mask], uncertainty=unc, weight=w) # Two dimensional ezfill('recoil_mjj', recoil=df["recoil_pt"][mask], mjj=df["mjj"][mask], weight=rweight[mask]) # Muons if '_1m_' in region or '_2m_' in region or 'no_veto' in region: w_allmu = weight_shape(muons.pt[mask], rweight[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_pt_abseta', pt=muons.pt[mask].flatten(), abseta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=rweight[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], rweight[mask]) ezfill('muon_pt0', pt=dimuons.i0.pt[mask].flatten(), weight=w_dimu) ezfill('muon_pt1', pt=dimuons.i1.pt[mask].flatten(), weight=w_dimu) ezfill('muon_eta0', eta=dimuons.i0.eta[mask].flatten(), weight=w_dimu) ezfill('muon_eta1', eta=dimuons.i1.eta[mask].flatten(), weight=w_dimu) ezfill('muon_phi0', phi=dimuons.i0.phi[mask].flatten(), weight=w_dimu) ezfill('muon_phi1', phi=dimuons.i1.phi[mask].flatten(), weight=w_dimu) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons if '_1e_' in region or '_2e_' in region or 'no_veto' in region: w_allel = weight_shape(electrons.pt[mask], rweight[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_pt_eta', pt=electrons.pt[mask].flatten(), eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=rweight[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], rweight[mask]) ezfill('electron_pt0', pt=dielectrons.i0.pt[mask].flatten(), weight=w_diel) ezfill('electron_pt1', pt=dielectrons.i1.pt[mask].flatten(), weight=w_diel) ezfill('electron_eta0', eta=dielectrons.i0.eta[mask].flatten(), weight=w_diel) ezfill('electron_eta1', eta=dielectrons.i1.eta[mask].flatten(), weight=w_diel) ezfill('electron_phi0', phi=dielectrons.i0.phi[mask].flatten(), weight=w_diel) ezfill('electron_phi1', phi=dielectrons.i1.phi[mask].flatten(), weight=w_diel) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon if '_g_' in region: w_leading_photon = weight_shape( photons[leadphoton_index].pt[mask], rweight[mask]) ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_pt0_recoil', pt=photons[leadphoton_index].pt[mask].flatten(), recoil=df['recoil_pt'][mask & (leadphoton_index.counts > 0)], weight=w_leading_photon) ezfill('photon_eta_phi', eta=photons[leadphoton_index].eta[mask].flatten(), phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], rweight[mask]) # Tau if 'no_veto' in region: w_all_taus = weight_shape(taus.pt[mask], rweight[mask]) ezfill("tau_pt", pt=taus.pt[mask].flatten(), weight=w_all_taus) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=rweight[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=rweight[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=exclude)[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=exclude)[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) return output
def __init__(self, dphi_control=False, data_type='sig'): self.dphi_control = dphi_control self.data_type = data_type dataset_axis = hist.Cat('dataset', 'dataset') self._accumulator = processor.dict_accumulator({ 'all05': processor.column_accumulator(np.zeros(shape=(0, ))), 'nopu05': processor.column_accumulator(np.zeros(shape=(0, ))), 'dbeta': processor.column_accumulator(np.zeros(shape=(0, ))), 'all05w': processor.column_accumulator(np.zeros(shape=(0, ))), 'nopu05w': processor.column_accumulator(np.zeros(shape=(0, ))), 'dbetaw': processor.column_accumulator(np.zeros(shape=(0, ))), 'pt': processor.column_accumulator(np.zeros(shape=(0, ))), 'eta': processor.column_accumulator(np.zeros(shape=(0, ))), 'wgt': processor.column_accumulator(np.zeros(shape=(0, ))), 'ljtype': processor.column_accumulator(np.zeros(shape=(0, ))), 'channel': processor.column_accumulator(np.zeros(shape=(0, ))), }) self.pucorrs = get_pu_weights_function() ## NOT applied for now self.nlo_w = get_nlo_weight_function('w') self.nlo_z = get_nlo_weight_function('z')
def process_shift(self, events, shift_name): dataset = events.metadata['dataset'] isRealData = not hasattr(events, "genWeight") selection = PackedSelection() weights = Weights(len(events), storeIndividual=True) output = self.make_output() if shift_name is None and not isRealData: output['sumw'] = ak.sum(events.genWeight) if isRealData or self._newTrigger: trigger = np.zeros(len(events), dtype='bool') for t in self._triggers[self._year]: if t in events.HLT.fields: trigger = trigger | events.HLT[t] selection.add('trigger', trigger) del trigger else: selection.add('trigger', np.ones(len(events), dtype='bool')) if isRealData: selection.add( 'lumimask', lumiMasks[self._year](events.run, events.luminosityBlock)) else: selection.add('lumimask', np.ones(len(events), dtype='bool')) if isRealData and self._skipRunB and self._year == '2017': selection.add('dropB', events.run > 299329) else: selection.add('dropB', np.ones(len(events), dtype='bool')) if isRealData: trigger = np.zeros(len(events), dtype='bool') for t in self._muontriggers[self._year]: if t in events.HLT.fields: trigger |= np.array(events.HLT[t]) selection.add('muontrigger', trigger) del trigger else: selection.add('muontrigger', np.ones(len(events), dtype='bool')) metfilter = np.ones(len(events), dtype='bool') for flag in self._met_filters[ self._year]['data' if isRealData else 'mc']: metfilter &= np.array(events.Flag[flag]) selection.add('metfilter', metfilter) del metfilter fatjets = events.FatJet fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year) fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year] candidatejet = fatjets[ # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269 (fatjets.pt > 200) & (abs(fatjets.eta) < 2.5) & fatjets.isTight # this is loose in sampleContainer ] candidatejet = candidatejet[:, : 2] # Only consider first two to match generators if self._jet_arbitration == 'pt': candidatejet = ak.firsts(candidatejet) elif self._jet_arbitration == 'mass': candidatejet = ak.firsts(candidatejet[ak.argmax( candidatejet.msdcorr, axis=1, keepdims=True)]) elif self._jet_arbitration == 'n2': candidatejet = ak.firsts(candidatejet[ak.argmin(candidatejet.n2ddt, axis=1, keepdims=True)]) elif self._jet_arbitration == 'ddb': candidatejet = ak.firsts(candidatejet[ak.argmax( candidatejet.btagDDBvLV2, axis=1, keepdims=True)]) elif self._jet_arbitration == 'ddc': candidatejet = ak.firsts(candidatejet[ak.argmax( candidatejet.btagDDCvLV2, axis=1, keepdims=True)]) else: raise RuntimeError("Unknown candidate jet arbitration") if self._tagger == 'v1': bvl = candidatejet.btagDDBvL cvl = candidatejet.btagDDCvL cvb = candidatejet.btagDDCvB elif self._tagger == 'v2': bvl = candidatejet.btagDDBvLV2 cvl = candidatejet.btagDDCvLV2 cvb = candidatejet.btagDDCvBV2 elif self._tagger == 'v3': bvl = candidatejet.particleNetMD_Xbb cvl = candidatejet.particleNetMD_Xcc / ( 1 - candidatejet.particleNetMD_Xbb) cvb = candidatejet.particleNetMD_Xcc / ( candidatejet.particleNetMD_Xcc + candidatejet.particleNetMD_Xbb) elif self._tagger == 'v4': bvl = candidatejet.particleNetMD_Xbb cvl = candidatejet.btagDDCvLV2 cvb = candidatejet.particleNetMD_Xcc / ( candidatejet.particleNetMD_Xcc + candidatejet.particleNetMD_Xbb) else: raise ValueError("Not an option") selection.add('minjetkin', (candidatejet.pt >= 450) & (candidatejet.pt < 1200) & (candidatejet.msdcorr >= 40.) & (candidatejet.msdcorr < 201.) & (abs(candidatejet.eta) < 2.5)) selection.add('_strict_mass', (candidatejet.msdcorr > 85) & (candidatejet.msdcorr < 130)) selection.add('_high_score', cvl > 0.8) selection.add('minjetkinmu', (candidatejet.pt >= 400) & (candidatejet.pt < 1200) & (candidatejet.msdcorr >= 40.) & (candidatejet.msdcorr < 201.) & (abs(candidatejet.eta) < 2.5)) selection.add('minjetkinw', (candidatejet.pt >= 200) & (candidatejet.pt < 1200) & (candidatejet.msdcorr >= 40.) & (candidatejet.msdcorr < 201.) & (abs(candidatejet.eta) < 2.5)) selection.add('jetid', candidatejet.isTight) selection.add('n2ddt', (candidatejet.n2ddt < 0.)) if not self._tagger == 'v2': selection.add('ddbpass', (bvl >= 0.89)) selection.add('ddcpass', (cvl >= 0.83)) selection.add('ddcvbpass', (cvb >= 0.2)) else: selection.add('ddbpass', (bvl >= 0.7)) selection.add('ddcpass', (cvl >= 0.45)) selection.add('ddcvbpass', (cvb >= 0.03)) jets = events.Jet jets = jets[(jets.pt > 30.) & (abs(jets.eta) < 2.5) & jets.isTight] # only consider first 4 jets to be consistent with old framework jets = jets[:, :4] dphi = abs(jets.delta_phi(candidatejet)) selection.add( 'antiak4btagMediumOppHem', ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch], axis=1, mask_identity=False) < BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium']) ak4_away = jets[dphi > 0.8] selection.add( 'ak4btagMedium08', ak.max(ak4_away[self._ak4tagBranch], axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium']) met = events.MET selection.add('met', met.pt < 140.) goodmuon = ((events.Muon.pt > 10) & (abs(events.Muon.eta) < 2.4) & (events.Muon.pfRelIso04_all < 0.25) & events.Muon.looseId) nmuons = ak.sum(goodmuon, axis=1) leadingmuon = ak.firsts(events.Muon[goodmuon]) if self._looseTau: goodelectron = ((events.Electron.pt > 10) & (abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.VETO)) nelectrons = ak.sum(goodelectron, axis=1) ntaus = ak.sum( ((events.Tau.pt > 20) & (abs(events.Tau.eta) < 2.3) & events.Tau.idDecayMode & ((events.Tau.idMVAoldDM2017v2 & 2) != 0) & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4, axis=2) & ak.all(events.Tau.metric_table( events.Electron[goodelectron]) > 0.4, axis=2)), axis=1, ) else: goodelectron = ( (events.Electron.pt > 10) & (abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.LOOSE)) nelectrons = ak.sum(goodelectron, axis=1) ntaus = ak.sum( (events.Tau.pt > 20) & events.Tau.idDecayMode # bacon iso looser than Nano selection & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4, axis=2) & ak.all(events.Tau.metric_table(events.Electron[goodelectron]) > 0.4, axis=2), axis=1, ) selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('onemuon', (nmuons == 1) & (nelectrons == 0) & (ntaus == 0)) selection.add('muonkin', (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1)) selection.add('muonDphiAK8', abs(leadingmuon.delta_phi(candidatejet)) > 2 * np.pi / 3) # W-Tag (Tag and Probe) # tag side selection.add( 'ak4btagMediumOppHem', ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch], axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium']) selection.add('met40p', met.pt > 40.) selection.add('tightMuon', (leadingmuon.tightId) & (leadingmuon.pt > 53.)) # selection.add('ptrecoW', (leadingmuon + met).pt > 250.) selection.add('ptrecoW200', (leadingmuon + met).pt > 200.) selection.add( 'ak4btagNearMu', leadingmuon.delta_r(leadingmuon.nearest(ak4_away, axis=None)) < 2.0) _bjets = jets[self._ak4tagBranch] > BTagEfficiency.btagWPs[ self._ak4tagger][self._year]['medium'] # _nearAK8 = jets.delta_r(candidatejet) < 0.8 # _nearMu = jets.delta_r(ak.firsts(events.Muon)) < 0.3 # selection.add('ak4btagOld', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1) _nearAK8 = jets.delta_r(candidatejet) < 0.8 _nearMu = jets.delta_r(leadingmuon) < 0.3 selection.add('ak4btagOld', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1) # _nearAK8 = jets.delta_r(candidatejet) < 0.8 # _nearMu = jets.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None)) < 0.3 # selection.add('ak4btagNew', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1) # probe side selection.add('minWjetpteta', (candidatejet.pt >= 200) & (abs(candidatejet.eta) < 2.4)) # selection.add('noNearMuon', candidatejet.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None)) > 1.0) selection.add('noNearMuon', candidatejet.delta_r(leadingmuon) > 1.0) ##### if isRealData: genflavor = ak.zeros_like(candidatejet.pt) else: if 'HToCC' in dataset or 'HToBB' in dataset: if self._ewkHcorr: add_HiggsEW_kFactors(weights, events.GenPart, dataset) weights.add('genweight', events.genWeight) if "PSWeight" in events.fields: add_ps_weight(weights, events.PSWeight) else: add_ps_weight(weights, None) if "LHEPdfWeight" in events.fields: add_pdf_weight(weights, events.LHEPdfWeight) else: add_pdf_weight(weights, None) if "LHEScaleWeight" in events.fields: add_scalevar_7pt(weights, events.LHEScaleWeight) add_scalevar_3pt(weights, events.LHEScaleWeight) else: add_scalevar_7pt(weights, []) add_scalevar_3pt(weights, []) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) bosons = getBosons(events.GenPart) matchedBoson = candidatejet.nearest(bosons, axis=None, threshold=0.8) if self._tightMatch: match_mask = ( (candidatejet.pt - matchedBoson.pt) / matchedBoson.pt < 0.5) & ((candidatejet.msdcorr - matchedBoson.mass) / matchedBoson.mass < 0.3) selmatchedBoson = ak.mask(matchedBoson, match_mask) genflavor = bosonFlavor(selmatchedBoson) else: genflavor = bosonFlavor(matchedBoson) genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0) if self._newVjetsKfactor: add_VJets_kFactors(weights, events.GenPart, dataset) else: add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) if shift_name is None: output['btagWeight'].fill(val=self._btagSF.addBtagWeight( weights, ak4_away, self._ak4tagBranch)) if self._nnlops_rew and dataset in [ 'GluGluHToCC_M125_13TeV_powheg_pythia8' ]: weights.add('minlo_rew', powheg_to_nnlops(ak.to_numpy(genBosonPt))) if self._newTrigger: add_jetTriggerSF( weights, ak.firsts(fatjets), self._year if not self._skipRunB else f'{self._year}CDEF', selection) else: add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) add_mutriggerSF(weights, leadingmuon, self._year, selection) add_mucorrectionsSF(weights, leadingmuon, self._year, selection) if self._year in ("2016", "2017"): weights.add("L1Prefiring", events.L1PreFiringWeight.Nom, events.L1PreFiringWeight.Up, events.L1PreFiringWeight.Dn) logger.debug("Weight statistics: %r" % weights.weightStatistics) msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * ( genflavor > 0) + candidatejet.msdcorr * (genflavor == 0) regions = { 'signal': [ 'noleptons', 'minjetkin', 'met', 'metfilter', 'jetid', 'antiak4btagMediumOppHem', 'n2ddt', 'trigger', 'lumimask' ], 'signal_noddt': [ 'noleptons', 'minjetkin', 'met', 'jetid', 'antiak4btagMediumOppHem', 'trigger', 'lumimask', 'metfilter' ], # 'muoncontrol': ['minjetkinmu', 'jetid', 'n2ddt', 'ak4btagMedium08', 'onemuon', 'muonkin', 'muonDphiAK8', 'muontrigger', 'lumimask', 'metfilter'], 'muoncontrol': [ 'onemuon', 'muonkin', 'muonDphiAK8', 'metfilter', 'minjetkinmu', 'jetid', 'ak4btagMedium08', 'n2ddt', 'muontrigger', 'lumimask' ], 'muoncontrol_noddt': [ 'onemuon', 'muonkin', 'muonDphiAK8', 'jetid', 'metfilter', 'minjetkinmu', 'jetid', 'ak4btagMedium08', 'muontrigger', 'lumimask' ], 'wtag': [ 'onemuon', 'tightMuon', 'minjetkinw', 'jetid', 'met40p', 'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger', 'lumimask' ], 'wtag0': [ 'onemuon', 'tightMuon', 'met40p', 'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger', 'lumimask' ], 'wtag2': [ 'onemuon', 'tightMuon', 'minjetkinw', 'jetid', 'ak4btagMediumOppHem', 'met40p', 'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger', 'lumimask' ], 'noselection': [], } def normalize(val, cut): if cut is None: ar = ak.to_numpy(ak.fill_none(val, np.nan)) return ar else: ar = ak.to_numpy(ak.fill_none(val[cut], np.nan)) return ar import time tic = time.time() if shift_name is None: for region, cuts in regions.items(): allcuts = set([]) cut = selection.all(*allcuts) output['cutflow_msd'].fill(region=region, genflavor=normalize( genflavor, None), cut=0, weight=weights.weight(), msd=normalize(msd_matched, None)) output['cutflow_eta'].fill(region=region, genflavor=normalize(genflavor, cut), cut=0, weight=weights.weight()[cut], eta=normalize( candidatejet.eta, cut)) output['cutflow_pt'].fill(region=region, genflavor=normalize(genflavor, cut), cut=0, weight=weights.weight()[cut], pt=normalize(candidatejet.pt, cut)) for i, cut in enumerate(cuts + ['ddcvbpass', 'ddcpass']): allcuts.add(cut) cut = selection.all(*allcuts) output['cutflow_msd'].fill(region=region, genflavor=normalize( genflavor, cut), cut=i + 1, weight=weights.weight()[cut], msd=normalize(msd_matched, cut)) output['cutflow_eta'].fill( region=region, genflavor=normalize(genflavor, cut), cut=i + 1, weight=weights.weight()[cut], eta=normalize(candidatejet.eta, cut)) output['cutflow_pt'].fill( region=region, genflavor=normalize(genflavor, cut), cut=i + 1, weight=weights.weight()[cut], pt=normalize(candidatejet.pt, cut)) if self._evtVizInfo and 'ddcpass' in allcuts and isRealData and region == 'signal': if 'event' not in events.fields: continue _cut = selection.all(*allcuts, '_strict_mass', '_high_score') # _cut = selection.all('_strict_mass'') output['to_check'][ 'mass'] += processor.column_accumulator( normalize(msd_matched, _cut)) nfatjet = ak.sum( ((fatjets.pt > 200) & (abs(fatjets.eta) < 2.5) & fatjets.isTight), axis=1) output['to_check'][ 'njet'] += processor.column_accumulator( normalize(nfatjet, _cut)) output['to_check'][ 'fname'] += processor.column_accumulator( np.array([events.metadata['filename']] * len(normalize(msd_matched, _cut)))) output['to_check'][ 'event'] += processor.column_accumulator( normalize(events.event, _cut)) output['to_check'][ 'luminosityBlock'] += processor.column_accumulator( normalize(events.luminosityBlock, _cut)) output['to_check'][ 'run'] += processor.column_accumulator( normalize(events.run, _cut)) if shift_name is None: systematics = [None] + list(weights.variations) else: systematics = [shift_name] def fill(region, systematic, wmod=None): selections = regions[region] cut = selection.all(*selections) sname = 'nominal' if systematic is None else systematic if wmod is None: if systematic in weights.variations: weight = weights.weight(modifier=systematic)[cut] else: weight = weights.weight()[cut] else: weight = weights.weight()[cut] * wmod[cut] output['templates'].fill( region=region, systematic=sname, runid=runmap(events.run)[cut], genflavor=normalize(genflavor, cut), pt=normalize(candidatejet.pt, cut), msd=normalize(msd_matched, cut), ddb=normalize(bvl, cut), ddc=normalize(cvl, cut), ddcvb=normalize(cvb, cut), weight=weight, ) if region in [ 'wtag', 'wtag0', 'wtag2', 'wtag3', 'wtag4', 'wtag5', 'wtag6', 'wtag7', 'noselection' ]: # and sname in ['nominal', 'pileup_weightDown', 'pileup_weightUp', 'jet_triggerDown', 'jet_triggerUp']: output['wtag'].fill( region=region, systematic=sname, genflavor=normalize(genflavor, cut), pt=normalize(candidatejet.pt, cut), msd=normalize(msd_matched, cut), n2ddt=normalize(candidatejet.n2ddt, cut), ddc=normalize(cvl, cut), ddcvb=normalize(cvb, cut), weight=weight, ) # if region in ['signal', 'noselection']: # output['etaphi'].fill( # region=region, # systematic=sname, # runid=runmap(events.run)[cut], # genflavor=normalize(genflavor, cut), # pt=normalize(candidatejet.pt, cut), # eta=normalize(candidatejet.eta, cut), # phi=normalize(candidatejet.phi, cut), # ddc=normalize(cvl, cut), # ddcvb=normalize(cvb, cut), # ), if not isRealData: if wmod is not None: _custom_weight = events.genWeight[cut] * wmod[cut] else: _custom_weight = np.ones_like(weight) output['genresponse_noweight'].fill( region=region, systematic=sname, pt=normalize(candidatejet.pt, cut), genpt=normalize(genBosonPt, cut), weight=_custom_weight, ) output['genresponse'].fill( region=region, systematic=sname, pt=normalize(candidatejet.pt, cut), genpt=normalize(genBosonPt, cut), weight=weight, ) if systematic is None: output['signal_opt'].fill( region=region, genflavor=normalize(genflavor, cut), ddc=normalize(cvl, cut), ddcvb=normalize(cvb, cut), msd=normalize(msd_matched, cut), weight=weight, ) output['signal_optb'].fill( region=region, genflavor=normalize(genflavor, cut), ddb=normalize(bvl, cut), msd=normalize(msd_matched, cut), weight=weight, ) for region in regions: cut = selection.all(*(set(regions[region]) - {'n2ddt'})) if shift_name is None: output['nminus1_n2ddt'].fill( region=region, n2ddt=normalize(candidatejet.n2ddt, cut), weight=weights.weight()[cut], ) for systematic in systematics: if isRealData and systematic is not None: continue fill(region, systematic) if shift_name is None and 'GluGluH' in dataset and 'LHEWeight' in events.fields: for i in range(9): fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:, i]) for c in events.LHEWeight.fields[1:]: fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c]) toc = time.time() output["filltime"] = toc - tic if shift_name is None: output["weightStats"] = weights.weightStatistics return {dataset: output}
def process(self, events): output = self.accumulator.identity() output['total']['all'] += len(events) # use a very loose preselection to filter the events presel = ak.num(events.Jet) > 2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() ## Muons muon = Collections(ev, "Muon", "vetoTTH").get() tightmuon = Collections(ev, "Muon", "tightTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "vetoTTH").get() tightelectron = Collections(ev, "Electron", "tightTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0, axis=1) lepton = ak.concatenate([muon, electron], axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] dilepton_mass = (leading_lepton + trailing_lepton).mass dilepton_pt = (leading_lepton + trailing_lepton).pt dilepton_dR = delta_r(leading_lepton, trailing_lepton) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort( jet.pt_nom, ascending=False )] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) tau = getTaus(ev) track = getIsoTracks(ev) ## forward jets j_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2] bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) jf = cross(j_fwd, jet) mjf = (jf['0'] + jf['1']).mass j_fwd2 = jf[ak.singletons( ak.argmax(mjf, axis=1) )]['1'] # this is the jet that forms the largest invariant mass with j_fwd delta_eta = ak.fill_none( ak.pad_none(abs(j_fwd2.eta - j_fwd.eta), 1, clip=True), 0) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) ## event selectors filters = getFilters(ev, year=self.year, dataset=dataset) dilep = ((ak.num(tightelectron) + ak.num(tightmuon)) == 2) lep0pt = ((ak.num(electron[(electron.pt > 25)]) + ak.num(muon[(muon.pt > 25)])) > 0) lep1pt = ((ak.num(electron[(electron.pt > 20)]) + ak.num(muon[(muon.pt > 20)])) > 1) lepveto = ((ak.num(electron) + ak.num(muon)) == 2) selection = PackedSelection() selection.add('lepveto', lepveto) selection.add('dilep', dilep) selection.add('filter', (filters)) selection.add('p_T(lep0)>25', lep0pt) selection.add('p_T(lep1)>20', lep1pt) selection.add('SS', (SSlepton | SSelectron | SSmuon)) selection.add('N_jet>3', (ak.num(jet) >= 4)) selection.add('N_central>2', (ak.num(central) >= 3)) selection.add('N_btag>0', (ak.num(btag) >= 1)) selection.add('N_fwd>0', (ak.num(fwd) >= 1)) #ss_reqs = ['lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS'] ss_reqs = [ 'lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS' ] #bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0', 'N_fwd>0'] bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0'] ss_reqs_d = {sel: True for sel in ss_reqs} ss_selection = selection.require(**ss_reqs_d) bl_reqs_d = {sel: True for sel in bl_reqs} BL = selection.require(**bl_reqs_d) weight = Weights(len(ev)) if not dataset == 'MuonEG': # lumi weight weight.add("weight", ev.weight) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) #cutflow = Cutflow(output, ev, weight=weight) #cutflow_reqs_d = {} #for req in bl_reqs: # cutflow_reqs_d.update({req: True}) # cutflow.addRow( req, selection.require(**cutflow_reqs_d) ) labels = { 'topW_v3': 0, 'TTW': 1, 'TTZ': 2, 'TTH': 3, 'ttbar': 4, 'ttbar1l_MG': 4 } if dataset in labels: label_mult = labels[dataset] else: label_mult = 5 label = np.ones(len(ev[BL])) * label_mult output["n_lep"] += processor.column_accumulator( ak.to_numpy((ak.num(electron) + ak.num(muon))[BL])) output["n_lep_tight"] += processor.column_accumulator( ak.to_numpy((ak.num(tightelectron) + ak.num(tightmuon))[BL])) output["lead_lep_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(leading_lepton[BL].pt, axis=1))) output["lead_lep_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(leading_lepton[BL].eta, axis=1))) output["lead_lep_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(leading_lepton[BL].phi, axis=1))) output["lead_lep_charge"] += processor.column_accumulator( ak.to_numpy(ak.flatten(leading_lepton[BL].charge, axis=1))) output["sublead_lep_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(trailing_lepton[BL].pt, axis=1))) output["sublead_lep_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(trailing_lepton[BL].eta, axis=1))) output["sublead_lep_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(trailing_lepton[BL].phi, axis=1))) output["sublead_lep_charge"] += processor.column_accumulator( ak.to_numpy(ak.flatten(trailing_lepton[BL].charge, axis=1))) output["lead_jet_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].pt, axis=1))) output["lead_jet_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].eta, axis=1))) output["lead_jet_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].phi, axis=1))) output["sublead_jet_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].pt, axis=1))) output["sublead_jet_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].eta, axis=1))) output["sublead_jet_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].phi, axis=1))) output["lead_btag_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].pt, axis=1))) output["lead_btag_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].eta, axis=1))) output["lead_btag_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].phi, axis=1))) output["sublead_btag_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].pt, axis=1))) output["sublead_btag_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].eta, axis=1))) output["sublead_btag_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].phi, axis=1))) output["fwd_jet_p"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none(ak.pad_none(j_fwd[BL].p, 1, clip=True), 0), axis=1))) output["fwd_jet_pt"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].pt, 1, clip=True), 0), axis=1))) output["fwd_jet_eta"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].eta, 1, clip=True), 0), axis=1))) output["fwd_jet_phi"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].phi, 1, clip=True), 0), axis=1))) output["mjj_max"] += processor.column_accumulator( ak.to_numpy(ak.fill_none(ak.max(mjf[BL], axis=1), 0))) output["delta_eta_jj"] += processor.column_accumulator( ak.to_numpy(ak.flatten(delta_eta[BL], axis=1))) output["met"] += processor.column_accumulator(ak.to_numpy(met_pt[BL])) output["ht"] += processor.column_accumulator(ak.to_numpy(ht[BL])) output["st"] += processor.column_accumulator(ak.to_numpy(st[BL])) output["n_jet"] += processor.column_accumulator( ak.to_numpy(ak.num(jet[BL]))) output["n_btag"] += processor.column_accumulator( ak.to_numpy(ak.num(btag[BL]))) output["n_fwd"] += processor.column_accumulator( ak.to_numpy(ak.num(fwd[BL]))) output["n_central"] += processor.column_accumulator( ak.to_numpy(ak.num(central[BL]))) output["n_tau"] += processor.column_accumulator( ak.to_numpy(ak.num(tau[BL]))) output["n_track"] += processor.column_accumulator( ak.to_numpy(ak.num(track[BL]))) output["dilepton_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(dilepton_pt[BL], axis=1))) output["dilepton_mass"] += processor.column_accumulator( ak.to_numpy(ak.flatten(dilepton_mass[BL], axis=1))) output["min_bl_dR"] += processor.column_accumulator( ak.to_numpy(min_bl_dR[BL])) output["min_mt_lep_met"] += processor.column_accumulator( ak.to_numpy(min_mt_lep_met[BL])) output["label"] += processor.column_accumulator(label) output["weight"] += processor.column_accumulator(weight.weight()[BL]) output["presel"]["all"] += len(ev[ss_selection]) output["sel"]["all"] += len(ev[BL]) return output
def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet) > 2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) ## Muons muon = Collections(ev, "Muon", "tightSSTTH").get() vetomuon = Collections(ev, "Muon", "vetoTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1) OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "tightSSTTH").get() vetoelectron = Collections(ev, "Electron", "vetoTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1) OSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 lepton = ak.concatenate([muon, electron], axis=1) dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0, axis=1) OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0, axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] second_lepton = lepton[~(trailing_lepton_idx & leading_lepton_idx)] ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort( jet.pt_nom, ascending=False )] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) ## forward jets high_p_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator high_pt_fwd = fwd[ak.singletons(ak.argmax( fwd.pt_nom, axis=1))] # highest transverse momentum spectator high_eta_fwd = fwd[ak.singletons(ak.argmax(abs( fwd.eta), axis=1))] # most forward spectator ## Get the two leading b-jets in terms of btag score high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2] jf = cross(high_p_fwd, jet) mjf = (jf['0'] + jf['1']).mass deltaEta = abs(high_p_fwd.eta - jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta) deltaEtaMax = ak.max(deltaEta, axis=1) mjf_max = ak.max(mjf, axis=1) jj = choose(jet, 2) mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) lt = met_pt + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) ht_central = ak.sum(central.pt, axis=1) # define the weight weight = Weights(len(ev)) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight * cfg['lumi'][self.year]) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add( "btag", self.btagSF.Method1a(btag, light, b_direction='central', c_direction='central')) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) sel = Selection( dataset=dataset, events=ev, year=self.year, ele=electron, ele_veto=vetoelectron, mu=muon, mu_veto=vetomuon, jet_all=jet, jet_central=central, jet_btag=btag, jet_fwd=fwd, met=ev.MET, ) BL = sel.dilep_baseline(SS=False) BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0']) output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb]) if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event])) run_ = ak.to_numpy(ev.run) lumi_ = ak.to_numpy(ev.luminosityBlock) event_ = ak.to_numpy(ev.event) output['%s_run' % dataset] += processor.column_accumulator( run_[BL]) output['%s_lumi' % dataset] += processor.column_accumulator( lumi_[BL]) output['%s_event' % dataset] += processor.column_accumulator( event_[BL]) # Now, take care of systematic unceratinties if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): alljets = getJets(ev, minPt=0, maxEta=4.7) alljets = alljets[(alljets.jetId > 1)] for var in self.variations: # get the collections that change with the variations btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet weight = Weights(len(ev)) weight.add("weight", ev.weight * cfg['lumi'][self.year]) weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) if var == 'centralUp': weight.add( "btag", self.btagSF.Method1a(btag, light, b_direction='central', c_direction='up')) elif var == 'centralDown': weight.add( "btag", self.btagSF.Method1a(btag, light, b_direction='central', c_direction='down')) elif var == 'upCentral': weight.add( "btag", self.btagSF.Method1a(btag, light, b_direction='up', c_direction='central')) elif var == 'downCentral': weight.add( "btag", self.btagSF.Method1a(btag, light, b_direction='down', c_direction='central')) weight.add("lepton", self.leptonSF.get(electron, muon)) met = ev.MET sel = Selection( dataset=dataset, events=ev, year=self.year, ele=electron, ele_veto=vetoelectron, mu=muon, mu_veto=vetomuon, jet_all=jet, jet_central=central, jet_btag=btag, jet_fwd=fwd, met=met, ) BL = sel.dilep_baseline(SS=False) BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0']) output['N_b_' + var].fill( dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb]) return output
def process(self, events): output = self.accumulator.identity() dataset = events.metadata['dataset'] output['sumw'][dataset] += ak.sum(np.sign(events.Generator.weight)) output['nevents'][dataset] += len(events) output = self.accumulator.identity() dataset = events.metadata['dataset'] if dataset not in ['singleelectron','singlemuon','egamma']: output['sumw'][dataset] += ak.sum(np.sign(events.Generator.weight)) output['nevents'][dataset] += len(events) if dataset in ['singleelectron','singlemuon','egamma']: events = events[lumimask(events.run,events.luminosityBlock)] events = events[(events.PuppiMET.pt > 30) | (events.PuppiMET.ptJERUp > 30) | (events.PuppiMET.ptJESUp > 30)] if year == "2016": if dataset == 'singlemuon': events = events[events.HLT.IsoTkMu24 | events.HLT.IsoMu24] elif dataset == 'singleelectron': events = events[vents.HLT.IsoTkMu24 | events.HLT.IsoMu24 | events.HLT.Ele27_WPTight_Gsf] else: events = events[events.HLT.IsoTkMu24 | events.HLT.IsoMu24 | events.HLT.Ele27_WPTight_Gsf] elif year == "2017": if dataset == 'singlemuon': events = events[events.HLT.IsoMu27] elif dataset == 'singleelectron': events = events[events.HLT.Ele32_WPTight_Gsf_L1DoubleEG] else: events = events[events.HLT.IsoMu27 | events.HLT.Ele32_WPTight_Gsf_L1DoubleEG] elif year == "2018": if dataset == 'singlemuon': events = events[events.HLT.IsoMu24] elif dataset == 'egamma': events = events[events.HLT.Ele32_WPTight_Gsf] else: events = events[events.HLT.IsoMu24 |events.HLT.Ele32_WPTight_Gsf] events = events[(ak.num(events.Jet) > 3) | ((ak.num(events.Jet) > 1) & (ak.num(events.FatJet) > 0))] events = events[(ak.num(events.Electron) > 0) | (ak.num(events.Muon) > 0)] tight_muons = events.Muon[events.Muon.tightId & (events.Muon.pfRelIso04_all < 0.15) & (events.Muon.pt > 26) & (abs(events.Muon.eta) < 2.4)] loose_not_tight_muons = events.Muon[events.Muon.tightId & (events.Muon.pfRelIso04_all < 0.4) & (events.Muon.pfRelIso04_all > 0.15) & (events.Muon.pt > 20) & (abs(events.Muon.eta) < 2.4)] tight_electrons = events.Electron[(events.Electron.pt > 30) & (events.Electron.cutBased >= 3) & (events.Electron.eta + events.Electron.deltaEtaSC < 2.5) & ((abs(events.Electron.dz) < 0.1) & (abs(events.Electron.dxy) < 0.05) & (events.Electron.eta + events.Electron.deltaEtaSC < 1.479)) | ((abs(events.Electron.dz) < 0.2) & (abs(events.Electron.dxy) < 0.1) & (events.Electron.eta + events.Electron.deltaEtaSC > 1.479))] name_map = jec_stack.blank_name_map name_map['JetPt'] = 'pt' name_map['JetMass'] = 'mass' name_map['JetEta'] = 'eta' name_map['JetA'] = 'area' jets = events.Jet jets['pt_raw'] = (1 - jets['rawFactor']) * jets['pt'] jets['mass_raw'] = (1 - jets['rawFactor']) * jets['mass'] jets['pt_gen'] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) jets['rho'] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] name_map['ptGenJet'] = 'pt_gen' name_map['ptRaw'] = 'pt_raw' name_map['massRaw'] = 'mass_raw' name_map['Rho'] = 'rho' events_cache = events.caches[0] jet_factory = CorrectedJetsFactory(name_map, jec_stack) corrected_jets = jet_factory.build(jets, lazy_cache=events_cache) jet_pt = corrected_jets.pt jet_pt_jesup = corrected_jets.JES_jes.up.pt jet_pt_jerup = corrected_jets.JER.up.pt corrected_jets = ak.zip({ "pt": corrected_jets.pt, "eta": corrected_jets.eta, "phi": corrected_jets.phi, "mass": corrected_jets.mass, "charge": np.ones(len(corrected_jets.pt)), "btagDeepB": corrected_jets.btagDeepB }, with_name="PtEtaPhiMCandidate") fatjets = events.FatJet[(events.FatJet.pt > 250) & (abs(events.FatJet.eta) < 2.5) & (events.FatJet.msoftdrop > 50) & (events.FatJet.msoftdrop < 150)] b_jets = corrected_jets[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 2.5) & (events.Jet.btagDeepB > 0.8953)] vbf_jets = corrected_jets[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7) & (events.Jet.btagDeepB < 0.2217)] nextrajets = ak.num(events.Jet[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7)]) - 4 nextrabjets = ak.num(events.Jet[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7) & (events.Jet.btagDeepB > 0.2217)]) - 2 basecut_merged = (ak.num(fatjets) > 0) & (ak.num(vbf_jets) > 1) & (ak.num(tight_muons) + ak.num(tight_electrons) == 1) & (ak.num(loose_not_tight_muons) == 0) & (events.PuppiMET.pt > 30) events_merged = events[basecut_merged] fatjets_merged = fatjets[basecut_merged] vbf_jets_merged = vbf_jets[basecut_merged] tight_muons_merged = tight_muons[basecut_merged] tight_electrons_merged = tight_electrons[basecut_merged] nextrajets_merged = nextrajets[basecut_merged] nextrabjets_merged = nextrabjets[basecut_merged] basecut = (ak.num(b_jets) > 1) & (ak.num(vbf_jets) > 1) & (ak.num(tight_muons) + ak.num(tight_electrons) == 1) & (ak.num(loose_not_tight_muons) == 0) & (events.PuppiMET.pt > 30) events = events[basecut] b_jets = b_jets[basecut] vbf_jets = vbf_jets[basecut] tight_muons = tight_muons[basecut] tight_electrons = tight_electrons[basecut] nextrajets = nextrajets[basecut] nextrabjets = nextrabjets[basecut] if dataset in ['singleelectron','singlemuon','egamma']: dataset = 'data' if ak.any(basecut_merged): cut7 = (fatjets_merged[:,0].mass > 50) & (fatjets_merged[:,0].mass < 150) & ((vbf_jets_merged[:,0]+vbf_jets_merged[:,1]).mass > 500) & (abs(vbf_jets_merged[:,0].eta - vbf_jets_merged[:,1].eta) > 2.5) & (ak.num(tight_muons_merged) > 0) cut8 = (fatjets_merged[:,0].mass > 50) & (fatjets_merged[:,0].mass < 150) & ((vbf_jets_merged[:,0]+vbf_jets_merged[:,1]).mass > 500) & (abs(vbf_jets_merged[:,0].eta - vbf_jets_merged[:,1].eta) > 2.5) & (ak.num(tight_electrons_merged) > 0) # cut9 = cut7 | cut8 cut1 = ((b_jets[:,0] + b_jets[:,1]).mass > 50) & ((b_jets[:,0] + b_jets[:,1]).mass < 150) & ((vbf_jets[:,0] + vbf_jets[:,1]).mass > 500) & (abs(vbf_jets[:,0].eta - vbf_jets[:,1].eta) > 2.5) & (ak.num(tight_muons) > 0) cut2 = ((b_jets[:,0] + b_jets[:,1]).mass > 50) & ((b_jets[:,0] + b_jets[:,1]).mass < 150) & ((vbf_jets[:,0] + vbf_jets[:,1]).mass > 500) & (abs(vbf_jets[:,0].eta - vbf_jets[:,1].eta) > 2.5) & (ak.num(tight_electrons) > 0) # cut3 = cut1 | cut2 if ak.any(basecut_merged) and ak.any(cut7): sel7_events = events_merged[cut7] sel7_fatjets = fatjets_merged[cut7] sel7_vbf_jets = vbf_jets_merged[cut7] sel7_muons = tight_muons_merged[cut7][:,0] sel7_nextrajets = nextrajets_merged[cut7] sel7_nextrabjets = nextrabjets_merged[cut7] output["weights_merged"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel7_events.Generator.weight).data)) output['variables_merged'][dataset] += processor.column_accumulator(np.transpose(np.vstack(( ak.to_numpy(sel7_fatjets[:,0].pt), ak.to_numpy(sel7_fatjets[:,0].eta), ak.to_numpy(sel7_fatjets[:,0].phi), ak.to_numpy(sel7_fatjets[:,0].btagDeepB), ak.to_numpy(sel7_fatjets[:,0].btagHbb), ak.to_numpy(sel7_fatjets[:,0].msoftdrop), ak.to_numpy(sel7_nextrajets), ak.to_numpy(sel7_nextrabjets), np.zeros(len(sel7_events)), np.sign(ak.to_numpy(sel7_muons.charge)+1), ak.to_numpy(sel7_muons.pt), ak.to_numpy(sel7_muons.eta), ak.to_numpy(sel7_muons.phi), ak.to_numpy(sel7_events.PuppiMET.pt), ak.to_numpy(sel7_events.PuppiMET.phi), ak.to_numpy(sel7_vbf_jets[:,0].pt), ak.to_numpy(sel7_vbf_jets[:,1].pt), ak.to_numpy(sel7_vbf_jets[:,0].eta), ak.to_numpy(sel7_vbf_jets[:,1].eta), ak.to_numpy(sel7_vbf_jets[:,0].phi), ak.to_numpy(sel7_vbf_jets[:,1].phi), ak.to_numpy(sel7_vbf_jets[:,0].btagDeepB), ak.to_numpy(sel7_vbf_jets[:,1].btagDeepB), ak.to_numpy((sel7_vbf_jets[:,0]+sel7_vbf_jets[:,1]).mass), ak.to_numpy(sel7_vbf_jets[:,0].eta - sel7_vbf_jets[:,1].eta), ak.to_numpy(np.sqrt(2*(sel7_muons+sel7_vbf_jets[:,0]).pt*sel7_events.PuppiMET.pt*(1 - np.cos(sel7_events.PuppiMET.phi - (sel7_muons+sel7_vbf_jets[:,0]).phi)))), ak.to_numpy(np.sqrt(2*(sel7_muons+sel7_vbf_jets[:,1]).pt*sel7_events.PuppiMET.pt*(1 - np.cos(sel7_events.PuppiMET.phi - (sel7_muons+sel7_vbf_jets[:,1]).phi)))))))) sel7_muonidsf = evaluator['muonidsf'](abs(sel7_muons.eta), sel7_muons.pt) sel7_muonisosf = evaluator['muonisosf'](abs(sel7_muons.eta), sel7_muons.pt) sel7_muonhltsf = evaluator['muonhltsf'](abs(sel7_muons.eta), sel7_muons.pt) sel7_weight = np.sign(sel7_events.Generator.weight)*sel7_events.L1PreFiringWeight.Nom*sel7_muonidsf*sel7_muonisosf*sel7_muonhltsf if ak.any(basecut_merged) and ak.any(cut8): sel8_events = events_merged[cut8] sel8_fatjets = fatjets_merged[cut8] sel8_vbf_jets = vbf_jets_merged[cut8] sel8_electrons = tight_electrons_merged[cut8][:,0] sel8_nextrajets = nextrajets_merged[cut8] sel8_nextrabjets = nextrabjets_merged[cut8] output["weights_merged"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel8_events.Generator.weight).data)) output['variables_merged'][dataset] += processor.column_accumulator(np.transpose(np.vstack(( ak.to_numpy(sel8_fatjets[:,0].pt), ak.to_numpy(sel8_fatjets[:,0].eta), ak.to_numpy(sel8_fatjets[:,0].phi), ak.to_numpy(sel8_fatjets[:,0].btagDeepB), ak.to_numpy(sel8_fatjets[:,0].btagHbb), ak.to_numpy(sel8_fatjets[:,0].msoftdrop), ak.to_numpy(sel8_nextrajets), ak.to_numpy(sel8_nextrabjets), np.ones(len(sel8_events)), np.sign(ak.to_numpy(sel8_electrons.charge)+1), ak.to_numpy(sel8_electrons.pt), ak.to_numpy(sel8_electrons.eta), ak.to_numpy(sel8_electrons.phi), ak.to_numpy(sel8_events.PuppiMET.pt), ak.to_numpy(sel8_events.PuppiMET.phi), ak.to_numpy(sel8_vbf_jets[:,0].pt), ak.to_numpy(sel8_vbf_jets[:,1].pt), ak.to_numpy(sel8_vbf_jets[:,0].eta), ak.to_numpy(sel8_vbf_jets[:,1].eta), ak.to_numpy(sel8_vbf_jets[:,0].phi), ak.to_numpy(sel8_vbf_jets[:,1].phi), ak.to_numpy(sel8_vbf_jets[:,0].btagDeepB), ak.to_numpy(sel8_vbf_jets[:,1].btagDeepB), ak.to_numpy((sel8_vbf_jets[:,0]+sel8_vbf_jets[:,1]).mass), ak.to_numpy(sel8_vbf_jets[:,0].eta - sel8_vbf_jets[:,1].eta), ak.to_numpy(np.sqrt(2*(sel8_electrons+sel8_vbf_jets[:,0]).pt*sel8_events.PuppiMET.pt*(1 - np.cos(sel8_events.PuppiMET.phi - (sel8_electrons+sel8_vbf_jets[:,0]).phi)))), ak.to_numpy(np.sqrt(2*(sel8_electrons+sel8_vbf_jets[:,1]).pt*sel8_events.PuppiMET.pt*(1 - np.cos(sel8_events.PuppiMET.phi - (sel8_electrons+sel8_vbf_jets[:,1]).phi)))))))) sel8_electronidsf = evaluator['electronidsf'](sel8_electrons.eta, sel8_electrons.pt) sel8_electronrecosf = evaluator['electronrecosf'](sel8_electrons.eta, sel8_electrons.pt) sel8_weight = np.sign(sel8_events.Generator.weight)*sel8_events.L1PreFiringWeight.Nom*sel8_electronidsf*sel8_electronrecosf if ak.any(basecut) and ak.any(cut1): sel1_events = events[cut1] sel1_b_jets = b_jets[cut1] sel1_vbf_jets = vbf_jets[cut1] sel1_muons = tight_muons[cut1][:,0] sel1_nextrajets = nextrajets[cut1] sel1_nextrabjets = nextrabjets[cut1] output["weights"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel1_events.Generator.weight).data)) output['variables'][dataset] += processor.column_accumulator(np.transpose(np.vstack(( ak.to_numpy(sel1_nextrajets), ak.to_numpy(sel1_nextrabjets), np.zeros(len(sel1_events)), np.sign(ak.to_numpy(sel1_muons.charge)+1), ak.to_numpy(sel1_muons.pt), ak.to_numpy(sel1_muons.eta), ak.to_numpy(sel1_muons.phi), ak.to_numpy(sel1_events.PuppiMET.pt), ak.to_numpy(sel1_events.PuppiMET.phi), ak.to_numpy(sel1_b_jets[:,0].pt), ak.to_numpy(sel1_b_jets[:,1].pt), ak.to_numpy(sel1_vbf_jets[:,0].pt), ak.to_numpy(sel1_vbf_jets[:,1].pt), ak.to_numpy(sel1_b_jets[:,0].eta), ak.to_numpy(sel1_b_jets[:,1].eta), ak.to_numpy(sel1_vbf_jets[:,0].eta), ak.to_numpy(sel1_vbf_jets[:,1].eta), ak.to_numpy(sel1_b_jets[:,0].phi), ak.to_numpy(sel1_b_jets[:,1].phi), ak.to_numpy(sel1_vbf_jets[:,0].phi), ak.to_numpy(sel1_vbf_jets[:,1].phi), ak.to_numpy(sel1_b_jets[:,0].btagDeepB), ak.to_numpy(sel1_b_jets[:,1].btagDeepB), ak.to_numpy(sel1_vbf_jets[:,0].btagDeepB), ak.to_numpy(sel1_vbf_jets[:,1].btagDeepB), ak.to_numpy((sel1_b_jets[:,0]+sel1_b_jets[:,1]).mass), ak.to_numpy((sel1_vbf_jets[:,0]+sel1_vbf_jets[:,1]).mass), ak.to_numpy(sel1_vbf_jets[:,0].eta - sel1_vbf_jets[:,1].eta), ak.to_numpy(np.sqrt(2*(sel1_muons+sel1_b_jets[:,0]).pt*sel1_events.PuppiMET.pt*(1 - np.cos(sel1_events.PuppiMET.phi - (sel1_muons+sel1_b_jets[:,0]).phi)))),ak.to_numpy(np.sqrt(2*(sel1_muons+sel1_b_jets[:,1]).pt*sel1_events.PuppiMET.pt*(1 - np.cos(sel1_events.PuppiMET.phi - (sel1_muons+sel1_b_jets[:,1]).phi)))))))) sel1_pu_weight = evaluator['pileup'](sel1_events.Pileup.nTrueInt) sel1_muonidsf = evaluator['muonidsf'](abs(sel1_muons.eta), sel1_muons.pt) sel1_muonisosf = evaluator['muonisosf'](abs(sel1_muons.eta), sel1_muons.pt) sel1_muonhltsf = evaluator['muonhltsf'](abs(sel1_muons.eta), sel1_muons.pt) sel1_weight = np.sign(sel1_events.Generator.weight)*sel1_pu_weight*sel1_events.L1PreFiringWeight.Nom*sel1_muonidsf*sel1_muonisosf*sel1_muonhltsf if ak.any(basecut) and ak.any(cut2): sel2_events = events[cut2] sel2_b_jets = b_jets[cut2] sel2_vbf_jets = vbf_jets[cut2] sel2_electrons = tight_electrons[cut2][:,0] sel2_nextrajets = nextrajets[cut2] sel2_nextrabjets = nextrabjets[cut2] output["weights"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel2_events.Generator.weight).data)) output['variables'][dataset] += processor.column_accumulator(np.transpose(np.vstack(( ak.to_numpy(sel2_nextrajets), ak.to_numpy(sel2_nextrabjets), np.ones(len(sel2_events)), np.sign(ak.to_numpy(sel2_electrons.charge)+1), ak.to_numpy(sel2_electrons.pt), ak.to_numpy(sel2_electrons.eta), ak.to_numpy(sel2_electrons.phi), ak.to_numpy(sel2_events.PuppiMET.pt), ak.to_numpy(sel2_events.PuppiMET.phi), ak.to_numpy(sel2_b_jets[:,0].pt), ak.to_numpy(sel2_b_jets[:,1].pt), ak.to_numpy(sel2_vbf_jets[:,0].pt), ak.to_numpy(sel2_vbf_jets[:,1].pt), ak.to_numpy(sel2_b_jets[:,0].eta), ak.to_numpy(sel2_b_jets[:,1].eta), ak.to_numpy(sel2_vbf_jets[:,0].eta), ak.to_numpy(sel2_vbf_jets[:,1].eta), ak.to_numpy(sel2_b_jets[:,0].phi), ak.to_numpy(sel2_b_jets[:,1].phi), ak.to_numpy(sel2_vbf_jets[:,0].phi), ak.to_numpy(sel2_vbf_jets[:,1].phi), ak.to_numpy(sel2_b_jets[:,0].btagDeepB), ak.to_numpy(sel2_b_jets[:,1].btagDeepB), ak.to_numpy(sel2_vbf_jets[:,0].btagDeepB), ak.to_numpy(sel2_vbf_jets[:,1].btagDeepB), ak.to_numpy((sel2_b_jets[:,0]+sel2_b_jets[:,1]).mass), ak.to_numpy((sel2_vbf_jets[:,0]+sel2_vbf_jets[:,1]).mass), ak.to_numpy(sel2_vbf_jets[:,0].eta - sel2_vbf_jets[:,1].eta), ak.to_numpy(np.sqrt(2*(sel2_electrons+sel2_b_jets[:,0]).pt*sel2_events.PuppiMET.pt*(1 - np.cos(sel2_events.PuppiMET.phi - (sel2_electrons+sel2_b_jets[:,0]).phi)))),ak.to_numpy(np.sqrt(2*(sel2_electrons+sel2_b_jets[:,1]).pt*sel2_events.PuppiMET.pt*(1 - np.cos(sel2_events.PuppiMET.phi - (sel2_electrons+sel2_b_jets[:,1]).phi)))))))) sel2_pu_weight = evaluator['pileup'](sel2_events.Pileup.nTrueInt) sel2_electronidsf = evaluator['electronidsf'](sel2_electrons.eta, sel2_electrons.pt) sel2_electronrecosf = evaluator['electronrecosf'](sel2_electrons.eta, sel2_electrons.pt) sel2_weight = np.sign(sel2_events.Generator.weight)*sel2_pu_weight*sel2_events.L1PreFiringWeight.Nom*sel2_electronidsf*sel2_electronrecosf return output
def __init__(self, sync=False, categories=[], checklist=pd.DataFrame([]), sample_list_dir="../sample_lists"): # load in fastmtt fastmtt_dir = '../svfit/fastmtt/' for basename in ['MeasuredTauLepton', 'svFitAuxFunctions', 'FastMTT']: path = fastmtt_dir + basename if os.path.isfile("{0:s}_cc.so".format(path)): ROOT.gInterpreter.ProcessLine(".L {0:s}_cc.so".format(path)) else: ROOT.gInterpreter.ProcessLine(".L {0:s}.cc++".format(path)) # customize the 4l final states considered if categories == 'all': self.categories = {1: 'eeet', 2: 'eemt', 3: 'eett', 4: 'eeem', 5: 'mmet', 6: 'mmmt', 7: 'mmtt', 8: 'mmem'} else: self.categories = {i:cat for i, cat in enumerate(categories)} print("\n...running on", self.categories) # sync mode runs a subset of the full analysis self.sync = sync self.mode = 'sync' if self.sync else 'all' self.checklist = checklist # failing sync events to double-check self.princeton_exclusive = np.array([248633, 250132, 250374, 256311, 2568862, 259595, 395373, 488027, 490292, 491592]) # location of the samples, usually differentiates sync vs. all self.sample_list_dir = sample_list_dir # correct number of leptons in each final state self.correct_n_electrons = {'eeem': 3, 'eeet': 3, 'eemt': 2, 'eett': 2, 'mmem': 1, 'mmet': 1, 'mmmt': 0, 'mmtt': 0} self.correct_n_muons = {'eeem': 1, 'eeet': 0, 'eemt': 1, 'eett': 0, 'mmem': 3, 'mmet': 2, 'mmmt': 3, 'mmtt': 2} # histogram axes specify histo names, labels, and bin shapes category_axis = hist.Cat("category", "") dataset_axis = hist.Cat("dataset", "") particle_axis = hist.Cat("particle", "") pt_axis = hist.Bin("pt", "$p_T$ [GeV]", 20, 0, 200) eta_axis = hist.Bin("eta", "$\eta$ [GeV]", 10, -5, 5) phi_axis = hist.Bin("phi", "$\phi$ [GeV]", 10, -np.pi, np.pi) mll_axis = hist.Bin("mll", "$m(l_1,l_2)$ [GeV]", 40, 0, 200) mtt_axis = hist.Bin("mtt", "$m(t_1,t_2)$ [GeV]", 30, 0, 300) mA_axis = hist.Bin("mA", "$m_A$ [GeV]", 40, 0, 400) mass_type_axis = hist.Cat("mass_type", "") nbtag_axis = hist.Bin("nbtag", "$n_{btag}$", 5, 0, 5) njets_axis = hist.Bin("njets", "$n_{jets}$", 5, 0, 5) jpt1_axis = hist.Bin("jpt1", "$p_T(j_1)$ [GeV]", 20, 0, 200) jeta1_axis = hist.Bin("jeta1", "$\eta (j_1)$ [GeV]", 10, -5, 5) jphi1_axis = hist.Bin("jphi1", "$\phi (j_1)$ [GeV]", 10, -np.pi, np.pi) bpt1_axis = hist.Bin("bpt1", "$p_T(b_1)$ [GeV]", 20, 0, 200) beta1_axis = hist.Bin("beta1", "$\eta (b_1)$ [GeV]", 10, -5, 5) bphi1_axis = hist.Bin("bphi1", "$\phi (b_1)$ [GeV]", 10, -np.pi, np.pi) self._accumulator = processor.dict_accumulator({ # event info, weights "sumw": processor.defaultdict_accumulator(float), "evt": processor.column_accumulator(np.array([])), "lumi": processor.column_accumulator(np.array([])), "run": processor.column_accumulator(np.array([])), "cat": processor.column_accumulator(np.array([])), "mll_array": processor.column_accumulator(np.array([])), "msv_cons_array": processor.column_accumulator(np.array([])), "m_mumu": processor.column_accumulator(np.array([])), # histograms "pt": hist.Hist("Events", dataset_axis, category_axis, pt_axis, particle_axis), "eta": hist.Hist("Events", dataset_axis, category_axis, eta_axis, particle_axis), "phi": hist.Hist("Events", dataset_axis, category_axis, phi_axis, particle_axis), "mll": hist.Hist("Events", dataset_axis, category_axis, mll_axis), "mtt": hist.Hist("Events", dataset_axis, category_axis, mtt_axis, mass_type_axis), "m4l": hist.Hist("Events", dataset_axis, category_axis, mA_axis, mass_type_axis), "nbtag": hist.Hist("Events", dataset_axis, category_axis, nbtag_axis), "njets": hist.Hist("Events", dataset_axis, category_axis, njets_axis), # cutflow 'cutflow': processor.defaultdict_accumulator( partial(processor.defaultdict_accumulator, int) ), 'cutflow_sync': processor.defaultdict_accumulator( partial(processor.defaultdict_accumulator, int) ) })
def process(self, events): # grab dataset metadata self.dataset = events.metadata['dataset'] year = self.dataset.split('_')[-1] self.output = self.accumulator.identity() print(year, self.dataset) # grab event id data self.event_ids = pd.DataFrame({'run': np.array(events.run, dtype=int), 'lumi': np.array(events.luminosityBlock, dtype=int), 'evt': np.array(events.event, dtype=int)}) self.fill_cutflow('all events', len(events), N_sync = self.check_events(self.event_ids)) # name data-taking eras, integrated lumis eras = {'2016': 'Summer16', '2017': 'Fall17', '2018': 'Autumn18'} lumi = {'2016': 35.9, '2017': 41.5, '2018': 59.7} # load properties of each sample with open("{0}/samples_{1}/{2}_properties.yaml" .format(self.sample_list_dir, self.mode, self.dataset), 'r') as stream: try: properties = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) xsec = float(properties[self.dataset]['xsec']) total_weight = float(properties[self.dataset]['total_weight']) sample_weight = lumi[year]*xsec/total_weight if ('data' in self.dataset): sample_weight = 1. ############# ## FILTERS ## ############# # calculate the MET filter flags = events.Flag MET_filter = (flags.goodVertices & flags.HBHENoiseFilter & flags.HBHENoiseIsoFilter & flags.EcalDeadCellTriggerPrimitiveFilter & flags.BadPFMuonFilter & flags.ecalBadCalibFilter) # calculate PV quality filter pv = events.PV pv_filter = ((pv.ndof > 4) & (abs(pv.z) < 24) & (np.sqrt(pv.x**2 + pv.y**2) < 2)) # apply filters events = events[MET_filter & pv_filter] self.event_ids = self.event_ids[MET_filter & pv_filter] self.fill_cutflow('MET, pv filters', len(events), N_sync = self.check_events(self.event_ids)) ###################### ## LOOSE SELECTIONS ## ###################### # apply loose selections loose_taus = self.loose_tau_selections(events.Tau) loose_muons = self.loose_muon_selections(events.Muon) loose_electrons = self.loose_electron_selections(events.Electron) loose_jets = self.loose_jet_selections(events.Jet, year) loose_bjets = self.loose_bjet_selections(events.Jet, year) MET = events.MET trigger_objects = events.TrigObj #if category=='eemt': # self.check_princeton_exclusive_loose(loose_electrons, loose_muons, loose_taus) # count electrons minus overlapped objects electron_counts = self.count_non_overlapped(loose_electrons) muon_counts = self.count_non_overlapped(loose_muons) ll_pairs = { 'ee': loose_electrons.distincts(), 'mm': loose_muons.distincts() } tt_pairs = { 'mt': loose_muons.cross(loose_taus), 'et': loose_electrons.cross(loose_taus), 'em': loose_electrons.cross(loose_muons), 'tt': loose_taus.distincts() } ############################### ## SELECTIONS (per category) ## ############################### for c, category in self.categories.items(): # identify correct trigger path HLT = events.HLT trigger_path = self.trigger_path(HLT, year, category, sync=self.sync) # n_leptons veto n_lepton_mask = self.n_lepton_veto(electron_counts, muon_counts, category) n_lepton_mask = n_lepton_mask & trigger_path # combine with trigger path jets, bjets = loose_jets[n_lepton_mask], loose_bjets[n_lepton_mask] self.met = MET[n_lepton_mask] trig_obj = trigger_objects[n_lepton_mask] # track event ids on a per-category basis self.evt_ids = self.event_ids[n_lepton_mask] # form 4l final states ll = ll_pairs[category[:2]][n_lepton_mask] tt = tt_pairs[category[2:]][n_lepton_mask] lltt = ll.cross(tt) self.fill_cutflow('n_lepton veto', len(self.evt_ids), N_sync = self.check_events(self.evt_ids)) # build non-overlapped final state objects lltt = self.dR_cut(ll.cross(tt), category, cutflow=True) lltt = self.build_Z_cand(lltt, category, cutflow=True) lltt = self.trigger_filter(lltt, trig_obj, category, cutflow=True) lltt = self.build_ditau_cand(lltt, category, cutflow=True) # apply b jet veto #self.evt_ids = self.evt_ids[bjets.counts==0] #lltt, met = lltt[bjets.counts==0], met[bjets.counts==0] #jets = jets[bjets.counts==0], #bjets = bjets[bjets.counts==0] #self.fill_cutflow('bjet veto', lltt[lltt.counts>0].shape[0], # N_sync = self.check_events(self.evt_ids[lltt.counts>0])) # take only valid final states self.evt_ids = self.evt_ids[lltt.counts>0] self.met = self.met[lltt.counts>0] lltt = lltt[lltt.counts>0] mll, mtt, m4l = self.get_masses(lltt, cutflow=True) msv, msv_cons, mA_corr, mA_cons = self.run_fastmtt(lltt, self.met, category) ################# ## FILL HISTOS ## ################# self.output["evt"] += processor.column_accumulator(self.evt_ids['evt'].to_numpy()) self.output["lumi"] += processor.column_accumulator(self.evt_ids['lumi'].to_numpy()) self.output["run"] += processor.column_accumulator(self.evt_ids['run'].to_numpy()) self.output["cat"] += processor.column_accumulator(np.array([category for _ in range(len(self.evt_ids))])) if category=='eemt': self.check_princeton_exclusive_fs(lltt) pts = [lltt.i0.pt.flatten(), lltt.i1.pt.flatten(), lltt.i2.pt.flatten(), lltt.i3.pt.flatten()] etas = [lltt.i0.eta.flatten(), lltt.i1.eta.flatten(), lltt.i2.eta.flatten(), lltt.i3.eta.flatten()] phis = [lltt.i0.phi.flatten(), lltt.i1.phi.flatten(), lltt.i2.phi.flatten(), lltt.i3.phi.flatten()] particle_nums = ["$l_1$", "$l_2$", "$t_1$", "$t_2$"] for i, pnum in enumerate(particle_nums): self.output["pt"].fill(dataset=self.dataset, category=category, pt=pts[i], particle=pnum, weight=sample_weight*np.ones(len(pts[i]))) self.output["eta"].fill(dataset=self.dataset, category=category, eta=etas[i], particle=pnum, weight=sample_weight*np.ones(len(etas[i]))) self.output["phi"].fill(dataset=self.dataset, category=category, phi=phis[i], particle=pnum, weight=sample_weight*np.ones(len(phis[i]))) if (category[:2]=='mm'): self.output["m_mumu"] += processor.column_accumulator(np.array(mll.flatten())) self.output["mll_array"] += processor.column_accumulator(np.array(mll.flatten())) self.output["msv_cons_array"] += processor.column_accumulator(np.array(msv_cons.flatten())) self.output["mll"].fill(dataset=self.dataset, category=category, mll=mll.flatten(), weight=sample_weight*np.ones(len(mll.flatten()))) self.output["mtt"].fill(dataset=self.dataset, category=category, mass_type="$m_{tt}$", mtt=mtt.flatten(), weight=sample_weight*np.ones(len(mtt.flatten()))) self.output["mtt"].fill(dataset=self.dataset, category=category, mass_type="$m_{fastmtt}$", mtt=msv.flatten(), weight=sample_weight*np.ones(len(msv.flatten()))) self.output["m4l"].fill(dataset=self.dataset, category=category, mass_type='$m_{4l}$', mA=m4l.flatten()) self.output["m4l"].fill(dataset=self.dataset, category=category, mass_type='$m_A^{corr}$', mA=mA_corr.flatten()) self.output["m4l"].fill(dataset=self.dataset, category=category, mass_type='$m_A^{cons}$', mA=mA_cons.flatten()) #nbtag = good_bjets.counts.flatten() #output["nbtag"].fill(dataset=dataset, category=category, nbtag=nbtag, weight=sample_weight*np.ones(len(nbtag))) #njets = good_jets.counts.flatten() #output["njets"].fill(dataset=dataset, category=category, njets=njets, weight=sample_weight*np.ones(len(njets))) return self.output
'lead_lep_phi', 'lead_lep_charge', 'sublead_lep_pt', 'sublead_lep_eta', 'sublead_lep_phi', 'sublead_lep_charge', 'dilepton_mass', 'dilepton_pt', 'min_bl_dR', 'min_mt_lep_met', 'label', 'weight', ] for var in variables: out_dict.update({var: processor.column_accumulator(np.zeros(shape=(0, )))}) class ML_preprocessor(processor.ProcessorABC): ''' e.g. deltaR of leptons, min deltaR of lepton and jet ''' def __init__(self, year=2018): self.year = year self._accumulator = processor.dict_accumulator(out_dict) self.btagSF = btag_scalefactor(year) self.leptonSF = LeptonSF(year=year)
def empty_column_accumulator_int64(): return processor.column_accumulator(np.array([], dtype=np.int64))
def process(self, events): events = events[ ak.num(events.Jet) > 0] #corrects for rare case where there isn't a single jet in event output = self.accumulator.identity() # we can use a very loose preselection to filter the events. nothing is done with this presel, though presel = ak.num(events.Jet) >= 0 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore # cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) ### For FCNC, we want electron -> tightTTH electron = Collections(ev, "Electron", "tightFCNC").get() fakeableelectron = Collections(ev, "Electron", "fakeableFCNC").get() muon = Collections(ev, "Muon", "tightFCNC").get() fakeablemuon = Collections(ev, "Muon", "fakeableFCNC").get() ##Jets Jets = events.Jet ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi lepton = fakeablemuon #ak.concatenate([fakeablemuon, fakeableelectron], axis=1) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) selection = PackedSelection() selection.add('MET<20', (ev.MET.pt < 20)) selection.add('mt<20', min_mt_lep_met < 20) #selection.add('MET<19', (ev.MET.pt<19) ) selection_reqs = ['MET<20', 'mt<20'] #, 'MET<19'] fcnc_reqs_d = {sel: True for sel in selection_reqs} fcnc_selection = selection.require(**fcnc_reqs_d) # define the weight weight = Weights(len(ev)) if not dataset == 'MuonEG': # generator weight weight.add("weight", ev.genWeight) jets = getJets( ev, maxEta=2.4, minPt=25, pt_var='pt' ) #& (ak.num(jets[~match(jets, fakeablemuon, deltaRCut=1.0)])>=1) single_muon_sel = (ak.num(muon) == 1) & (ak.num(fakeablemuon) == 1) | ( ak.num(muon) == 0) & (ak.num(fakeablemuon) == 1) single_electron_sel = (ak.num(electron) == 1) & ( ak.num(fakeableelectron) == 1) | (ak.num(electron) == 0) & (ak.num(fakeableelectron) == 1) fcnc_muon_sel = (ak.num( jets[~match(jets, fakeablemuon, deltaRCut=1.0)]) >= 1) & fcnc_selection & single_muon_sel fcnc_electron_sel = (ak.num( jets[~match(jets, fakeableelectron, deltaRCut=1.0)]) >= 1) & fcnc_selection & single_electron_sel tight_muon_sel = (ak.num(muon) == 1) & fcnc_muon_sel loose_muon_sel = (ak.num(fakeablemuon) == 1) & fcnc_muon_sel tight_electron_sel = (ak.num(electron) == 1) & fcnc_electron_sel loose_electron_sel = (ak.num(fakeableelectron) == 1) & fcnc_electron_sel output['single_mu_fakeable'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].conePt)), eta=np.abs( ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].eta)))) output['single_mu'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(muon[tight_muon_sel].conePt)), eta=np.abs(ak.to_numpy(ak.flatten(muon[tight_muon_sel].eta)))) output['single_e_fakeable'].fill( dataset=dataset, pt=ak.to_numpy( ak.flatten(fakeableelectron[loose_electron_sel].conePt)), eta=np.abs( ak.to_numpy( ak.flatten(fakeableelectron[loose_electron_sel].eta)))) output['single_e'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(electron[tight_electron_sel].conePt)), eta=np.abs( ak.to_numpy(ak.flatten(electron[tight_electron_sel].eta)))) if self.debug: #create pandas dataframe for debugging passed_events = ev[tight_muon_sel] passed_muons = muon[tight_muon_sel] event_p = ak.to_pandas(passed_events[["event"]]) event_p["MET_PT"] = passed_events["MET"]["pt"] event_p["mt"] = min_mt_lep_met[tight_muon_sel] event_p["num_tight_mu"] = ak.to_numpy(ak.num(muon)[tight_muon_sel]) event_p["num_loose_mu"] = ak.num(fakeablemuon)[tight_muon_sel] muon_p = ak.to_pandas( ak.flatten(passed_muons)[[ "pt", "conePt", "eta", "dz", "dxy", "ptErrRel", "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso", "jetPtRelv2" ]]) #convert to numpy array for the output events_array = pd.concat([muon_p, event_p], axis=1) events_to_add = [6886009] for e in events_to_add: tmp_event = ev[ev.event == e] added_event = ak.to_pandas(tmp_event[["event"]]) added_event["MET_PT"] = tmp_event["MET"]["pt"] added_event["mt"] = min_mt_lep_met[ev.event == e] added_event["num_tight_mu"] = ak.to_numpy( ak.num(muon)[ev.event == e]) added_event["num_loose_mu"] = ak.to_numpy( ak.num(fakeablemuon)[ev.event == e]) add_muon = ak.to_pandas( ak.flatten(muon[ev.event == e])[[ "pt", "conePt", "eta", "dz", "dxy", "ptErrRel", "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso", "jetPtRelv2" ]]) add_concat = pd.concat([add_muon, added_event], axis=1) events_array = pd.concat([events_array, add_concat], axis=0) output['muons_df'] += processor.column_accumulator( events_array.to_numpy()) return output
def empty_column_accumulator_float16(): return processor.column_accumulator(np.array([], dtype=np.float16))
def process(self, df): output = self.accumulator.identity() if df.size == 0: return output dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type != 'data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'], py=df['pfjet_p4.fCoordinates.fY'], pz=df['pfjet_p4.fCoordinates.fZ'], energy=df['pfjet_p4.fCoordinates.fT'], pfisoAll05=df['pfjet_pfIsolation05'], pfisoNopu05=df['pfjet_pfIsolationNoPU05'], pfisoDbeta=df['pfjet_pfiso'], ncands=df['pfjet_pfcands_n'], ) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype == 3).sum() ndsa = (ljdautype == 8).sum() isegammajet = (npfmu == 0) & (ndsa == 0) ispfmujet = (npfmu >= 2) & (ndsa == 0) isdsajet = ndsa > 0 label = isegammajet.astype(int) * 1 + ispfmujet.astype( int) * 2 + isdsajet.astype(int) * 3 leptonjets.add_attributes(label=label) nmu = ((ljdautype == 3) | (ljdautype == 8)).sum() leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0)) ## __twoleptonjets__ twoleptonjets = leptonjets.counts >= 2 dileptonjets = leptonjets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size == 0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum() == 1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents & muljInLeading2Events).astype(int) * 1 doubleMuljEvents = dileptonjets.ismutype.sum() == 2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2 channel_ = channel_2mu2e + channel_4mu ########### isControl = (np.abs(lj0.p4.delta_phi(lj1.p4)) < np.pi / 2).flatten() ## __isControl__ if self.dphi_control: dileptonjets = dileptonjets[isControl] wgt = wgt[isControl] lj0 = lj0[isControl] lj1 = lj1[isControl] channel_ = channel_[isControl] else: dileptonjets = dileptonjets if dileptonjets.size == 0: return output if self.data_type == 'bkg': wgt *= bkgSCALE[dataset] output['all05'] += processor.column_accumulator( dileptonjets.pfisoAll05.flatten()) output['nopu05'] += processor.column_accumulator( dileptonjets.pfisoNopu05.flatten()) output['dbeta'] += processor.column_accumulator( dileptonjets.pfisoDbeta.flatten()) output['all05w'] += processor.column_accumulator( (dileptonjets.pfisoAll05 / dileptonjets.ncands).flatten()) output['nopu05w'] += processor.column_accumulator( (dileptonjets.pfisoNopu05 / dileptonjets.ncands).flatten()) output['dbetaw'] += processor.column_accumulator( (dileptonjets.pfisoDbeta / dileptonjets.ncands).flatten()) output['pt'] += processor.column_accumulator(dileptonjets.pt.flatten()) output['eta'] += processor.column_accumulator( dileptonjets.eta.flatten()) output['wgt'] += processor.column_accumulator( (dileptonjets.pt.ones_like() * wgt).flatten()) output['ljtype'] += processor.column_accumulator( (dileptonjets.ismutype.astype(int) * 1 + dileptonjets.iseltype.astype(int) * 2).flatten()) output['channel'] += processor.column_accumulator( (dileptonjets.pt.ones_like() * channel_).flatten()) return output
def empty_column_accumulator_bool(): return processor.column_accumulator(np.array([], dtype=np.bool))
def __init__( self, year='2017', systematics=True, jet_arbitration='pt', tagger='v2', nnlops_rew=False, skipJER=False, tightMatch=False, newTrigger=False, looseTau=False, newVjetsKfactor=False, ak4tagger='deepcsv', skipRunB=False, finebins=False, ewkHcorr=False, evtVizInfo=False, ): self._year = year self._tagger = tagger self.systematics = systematics self._nnlops_rew = nnlops_rew # for 2018, reweight POWHEG to NNLOPS self._jet_arbitration = jet_arbitration self._skipJER = skipJER self._tightMatch = tightMatch self._newVjetsKfactor = newVjetsKfactor self._newTrigger = newTrigger # Fewer triggers, new maps (2017 only, ~no effect) self._looseTau = looseTau # Looser tau veto self._ewkHcorr = ewkHcorr self._ak4tagger = ak4tagger self._skipRunB = skipRunB self._finebins = finebins self._evtVizInfo = evtVizInfo if self._ak4tagger == 'deepcsv': self._ak4tagBranch = 'btagDeepB' elif self._ak4tagger == 'deepjet': self._ak4tagBranch = 'btagDeepFlavB' else: raise NotImplementedError() self._btagSF = BTagCorrector(year, self._ak4tagger, 'medium') self._msdSF = { '2016': 1., '2017': 0.987, '2018': 0.970, } self._muontriggers = { '2016': [ 'Mu50', # TODO: check ], '2017': [ 'Mu50', 'TkMu50', ], '2018': [ 'Mu50', # TODO: check ], } self._triggers = { '2016': [ 'PFHT800', 'PFHT900', 'AK8PFJet360_TrimMass30', 'AK8PFHT700_TrimR0p1PT0p03Mass50', 'PFHT650_WideJetMJJ950DEtaJJ1p5', 'PFHT650_WideJetMJJ900DEtaJJ1p5', 'AK8DiPFJet280_200_TrimMass30_BTagCSV_p20', 'PFJet450', ], '2017': [ 'AK8PFJet330_PFAK8BTagCSV_p17', 'PFHT1050', 'AK8PFJet400_TrimMass30', 'AK8PFJet420_TrimMass30', # redundant 'AK8PFHT800_TrimMass50', 'PFJet500', 'AK8PFJet500', ], '2018': [ 'AK8PFJet400_TrimMass30', 'AK8PFJet420_TrimMass30', 'AK8PFHT800_TrimMass50', 'PFHT1050', 'PFJet500', 'AK8PFJet500', 'AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4', ], } # https://twiki.cern.ch/twiki/bin/view/CMS/MissingETOptionalFiltersRun2 self._met_filters = { '2016': { 'data': [ 'goodVertices', 'globalSuperTightHalo2016Filter', 'HBHENoiseFilter', 'HBHENoiseIsoFilter', 'EcalDeadCellTriggerPrimitiveFilter', 'BadPFMuonFilter', 'eeBadScFilter', ], 'mc': [ 'goodVertices', 'globalSuperTightHalo2016Filter', 'HBHENoiseFilter', 'HBHENoiseIsoFilter', 'EcalDeadCellTriggerPrimitiveFilter', 'BadPFMuonFilter', # 'eeBadScFilter', ], }, '2017': { 'data': [ 'goodVertices', 'globalSuperTightHalo2016Filter', 'HBHENoiseFilter', 'HBHENoiseIsoFilter', 'EcalDeadCellTriggerPrimitiveFilter', 'BadPFMuonFilter', 'eeBadScFilter', 'ecalBadCalibFilterV2', ], 'mc': [ 'goodVertices', 'globalSuperTightHalo2016Filter', 'HBHENoiseFilter', 'HBHENoiseIsoFilter', 'EcalDeadCellTriggerPrimitiveFilter', 'BadPFMuonFilter', # 'eeBadScFilter', 'ecalBadCalibFilterV2', ], }, '2018': { 'data': [ 'goodVertices', 'globalSuperTightHalo2016Filter', 'HBHENoiseFilter', 'HBHENoiseIsoFilter', 'EcalDeadCellTriggerPrimitiveFilter', 'BadPFMuonFilter', 'eeBadScFilter', 'ecalBadCalibFilterV2', ], 'mc': [ 'goodVertices', 'globalSuperTightHalo2016Filter', 'HBHENoiseFilter', 'HBHENoiseIsoFilter', 'EcalDeadCellTriggerPrimitiveFilter', 'BadPFMuonFilter', # 'eeBadScFilter', 'ecalBadCalibFilterV2', ], }, } self._json_paths = { '2016': 'jsons/Cert_271036-284044_13TeV_23Sep2016ReReco_Collisions16_JSON.txt', '2017': 'jsons/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt', '2018': 'jsons/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt', } if self._tagger == 'v3': taggerbins = ( hist2.axis.Variable([0, 0.7, 0.89, 1], name='ddb', label=r'Jet ddb score', flow=False), hist2.axis.Variable([0, 0.44, .84, 1], name='ddc', label=r'Jet ddc score', flow=False), hist2.axis.Variable([0, 0.017, 0.11, 1], name='ddcvb', label=r'Jet ddcvb score', flow=False), ) else: taggerbins = ( # hist2.axis.Variable([0, 0.7, 0.89, 1], name='ddb', label=r'Jet ddb score', flow=False), # hist2.axis.Variable([0, 0.34, .45, 0.49, 1], name='ddc', label=r'Jet ddc score', flow=False), # hist2.axis.Variable([0, 0.03, 0.035, 1], name='ddcvb', label=r'Jet ddcvb score', flow=F hist2.axis.Variable([0, 0.7, 1], name='ddb', label=r'Jet ddb score', flow=False), hist2.axis.Variable([0, 0.4, 0.45, 0.5, 0.7, 1], name='ddc', label=r'Jet ddc score', flow=False), hist2.axis.Variable([0, 0.01, 0.03, 0.1, 1], name='ddcvb', label=r'Jet ddcvb score', flow=False), ) if self._finebins: mass_bins = hist2.axis.Regular(200, 40, 200, name='msd', label=r'Jet $m_{sd}$') pt_bins = hist2.axis.Variable( [450, 475, 500, 550, 600, 675, 800, 1200], name='pt', label=r'Jet $p_{T}$ [GeV]') else: mass_bins = hist2.axis.Regular(23, 40, 201, name='msd', label=r'Jet $m_{sd}$', flow=False) pt_bins = hist2.axis.Variable( [450, 475, 500, 550, 600, 675, 800, 1200], name='pt', label=r'Jet $p_{T}$ [GeV]', flow=False) gen_axis = hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor') optbins = np.r_[np.linspace(0, 0.15, 30, endpoint=False), np.linspace(0.15, 1, 86)] self.make_output = lambda: { 'sumw': 0., 'to_check': { "mass": processor.column_accumulator(np.array([])), "njet": processor.column_accumulator(np.array([])), "fname": processor.column_accumulator(np.array([])), "run": processor.column_accumulator(np.array([])), "luminosityBlock": processor.column_accumulator(np.array([])), "event": processor.column_accumulator(np.array([])), }, 'cutflow_msd': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), gen_axis, hist2.axis.IntCategory( [0, 1, 2, 3], name='cut', label='Cut index', growth=True), mass_bins, hist2.storage.Weight(), ), 'cutflow_eta': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), gen_axis, hist2.axis.IntCategory( [0, 1, 2, 3], name='cut', label='Cut index', growth=True), hist2.axis.Regular( 40, -2.5, 2.5, name='eta', label=r'Jet $\eta$'), hist2.storage.Weight(), ), 'cutflow_pt': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), gen_axis, hist2.axis.IntCategory( [0, 1, 2, 3], name='cut', label='Cut index', growth=True), hist2.axis.Regular( 100, 400, 1200, name='pt', label=r'Jet $p_{T}$ [GeV]'), hist2.storage.Weight(), ), 'nminus1_n2ddt': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), hist2.axis.Regular( 40, -0.25, 0.25, name='n2ddt', label='N2ddt value'), hist2.storage.Weight(), ), 'btagWeight': hist2.Hist( hist2.axis.Regular( 50, 0, 3, name='val', label='BTag correction'), hist2.storage.Weight(), ), 'templates': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), hist2.axis.StrCategory([], name='systematic', growth=True), hist2.axis.StrCategory([], name='runid', growth=True), gen_axis, pt_bins, mass_bins, *taggerbins, hist2.storage.Weight(), ), # 'etaphi': hist2.Hist( # hist2.axis.StrCategory([], name='region', growth=True), # hist2.axis.StrCategory([], name='systematic', growth=True), # hist2.axis.StrCategory([], name='runid', growth=True), # hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'), # hist2.axis.Regular(30, -2.5, 2.5, name='eta', label=r'Jet $\eta$'), # hist2.axis.Regular(30, -3.14, 3.14, name='phi', label=r'Jet $\phi$'), # pt_bins, # *taggerbins[1:], # hist2.storage.Weight(), # ), 'wtag': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), hist2.axis.StrCategory([], name='systematic', growth=True), gen_axis, hist2.axis.Variable( [-1, 0, 1 ], name='n2ddt', label=r'N2ddt value', flow=False), hist2.axis.Variable([200, 250, 300, 350, 400, 450, 1200], name='pt', label=r'Jet $p_{T}$ [GeV]'), hist2.axis.Regular( 46, 40, 201, name='msd', label=r'Jet $m_{sd}$', flow=False ), *taggerbins[1:], hist2.storage.Weight(), ), 'signal_opt': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'), hist2.axis.Variable( optbins, name='ddc', label=r'Jet CvL score'), hist2.axis.Variable( optbins, name='ddcvb', label=r'Jet CvB score'), hist2.axis.Variable([40, 70, 80, 90, 100, 110, 120, 130, 140], name='msd', label=r'Jet $m_{sd}$'), hist2.storage.Weight(), ), 'signal_optb': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'), hist2.axis.Variable( optbins, name='ddb', label=r'Jet BvL score'), hist2.axis.Variable([40, 70, 80, 90, 100, 110, 120, 130, 140], name='msd', label=r'Jet $m_{sd}$'), hist2.storage.Weight(), ), 'genresponse_noweight': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), hist2.axis.StrCategory([], name='systematic', growth=True), hist2.axis.Variable([450, 500, 550, 600, 675, 800, 1200], name='pt', label=r'Jet $p_{T}$ [GeV]'), hist2.axis.Variable(np.geomspace(400, 1200, 60), name='genpt', label=r'Generated Higgs $p_{T}$ [GeV]'), hist2.storage.Double(), ), 'genresponse': hist2.Hist( hist2.axis.StrCategory([], name='region', growth=True), hist2.axis.StrCategory([], name='systematic', growth=True), hist2.axis.Variable([450, 500, 550, 600, 675, 800, 1200], name='pt', label=r'Jet $p_{T}$ [GeV]'), hist2.axis.Variable([200, 300, 450, 650, 7500], name='genpt', label=r'Generated Higgs $p_{T}$ [GeV]'), hist2.storage.Weight(), ), }
def process(self, events): output = self.accumulator.identity() ############### Cuts # Dimu cuts: charge = 0, mass cuts and chi2... # test if there is any events in the file if len(events) == 0: return output ############### Get the main primary vertex properties ############### Primary_vertex = ak.zip({**get_vars_dict(events, primary_vertex_cols)}) ############### Get the gen particles properties ############### if (self.analysis_type == 'mc'): Gen_particles = ak.zip({**get_vars_dict(events, gen_part_cols)}) elif (self.analysis_type == 'data'): Gen_particles = ak.zip([[]]) ## Cut for Gen jpsi if (self.analysis_type == 'mc'): Gen_Jpsi = Gen_particles[Gen_particles.pdgId == 443] # Cut for Gen Dstar if (self.analysis_type == 'mc'): Gen_Dstar = Gen_particles[Gen_particles.pdgId == 413] # Cut for Gen D0 if (self.analysis_type == 'mc'): Gen_D0 = Gen_particles[Gen_particles.pdgId == 421] ############### Get All the interesting candidates from NTuples Dimu = ak.zip({**get_vars_dict(events, dimu_cols)}, with_name="PtEtaPhiMCandidate") Muon = ak.zip({**get_vars_dict(events, muon_cols)}, with_name="PtEtaPhiMCandidate") D0 = ak.zip({'mass': events.D0_mass12, **get_vars_dict(events, d0_cols)}, with_name="PtEtaPhiMCandidate") Dstar = ak.zip({'mass': (events.DstarD0_mass + events.Dstar_deltamr), 'charge': events.Dstarpis_chg, **get_vars_dict(events, dstar_cols)}, with_name="PtEtaPhiMCandidate") output['cutflow']['Number of events'] += len(events) output['cutflow']['Number of Dimu'] += ak.sum(ak.num(Dimu)) output['cutflow']['all D0'] += ak.sum(ak.num(D0)) output['cutflow']['all Dstar'] += ak.sum(ak.num(Dstar)) ############### Dimu cuts charge = 0, mass cuts and chi2... Dimu = Dimu[Dimu.charge == 0] output['cutflow']['Dimu 0 charge'] += ak.sum(ak.num(Dimu)) Dimu = Dimu[((Dimu.mass > 8.5) & (Dimu.mass < 11.5)) | ((Dimu.mass > 2.9) & (Dimu.mass < 3.3)) | ((Dimu.mass > 3.35) & (Dimu.mass < 4.05))] output['cutflow']['Quarkonia mass'] += ak.sum(ak.num(Dimu)) # Prompt/nomprompt cut for jpsi dimuon_prompt_cut = (Dimu.dlSig > 0) & (Dimu.dlSig < 2.5) dimuon_nonprompt_cut = (Dimu.dlSig > 4) #Dimu = Dimu[dimuon_nonprompt_cut] #output['cutflow']['Dimu prompt'] += ak.sum(ak.num(Dimu)) # Pointing angle cut for jpsi dimuon_pointing_cut = (Dimu.cosphi > 0.99) #Dimu = Dimu[dimuon_pointing_cut] ############### Get the Muons from Dimu, for cuts in their params Muon = ak.zip({'0': Muon[Dimu.t1_muIdx], '1': Muon[Dimu.t2_muIdx]}) # SoftId and Global Muon cuts soft_id = (Muon.slot0.softId > 0) & (Muon.slot1.softId > 0) Dimu = Dimu[soft_id] Muon = Muon[soft_id] output['cutflow']['Dimu muon softId'] += ak.sum(ak.num(Dimu)) global_muon = (Muon.slot0.isGlobal > 0) & (Muon.slot1.isGlobal > 0) Dimu = Dimu[global_muon] Muon = Muon[global_muon] output['cutflow']['Dimu muon global'] += ak.sum(ak.num(Dimu)) # pt and eta cuts if loose: muon_pt_cut = (Muon.slot0.pt > 1) & (Muon.slot1.pt > 1) else: muon_pt_cut = (Muon.slot0.pt > 3) & (Muon.slot1.pt > 3) Dimu = Dimu[muon_pt_cut] Muon = Muon[muon_pt_cut] output['cutflow']['Dimu muon pt cut'] += ak.sum(ak.num(Dimu)) muon_eta_cut = (np.absolute(Muon.slot0.eta) <= 2.4) & (np.absolute(Muon.slot1.eta) <= 2.4) Dimu = Dimu[muon_eta_cut] Muon = Muon[muon_eta_cut] output['cutflow']['Dimu muon eta cut'] += ak.sum(ak.num(Dimu)) #dimu_pt_cut = (Dimu.pt > 22) & (Dimu.pt < 26) #Dimu = Dimu[dimu_pt_cut] #dimu_rap_cut = (Dimu.rap > 1.2) & (Dimu.rap < 1.8) #Dimu = Dimu[dimu_rap_cut] Dimu['is_ups'] = (Dimu.mass > 8.5) & (Dimu.mass < 11.5) Dimu['is_jpsi'] = (Dimu.mass > 2.9) & (Dimu.mass < 3.3) Dimu['is_psi'] = (Dimu.mass > 3.35) & (Dimu.mass < 4.05) ############### Cuts for D0 D0 = D0[~D0.hasMuon] output['cutflow']['D0 trk muon cut'] += ak.sum(ak.num(D0)) if loose: D0 = D0[(D0.t1_pt > 0.4) & (D0.t2_pt > 0.4)] output['cutflow']['D0 trk pt cut'] += ak.sum(ak.num(D0)) D0 = D0[(D0.t1_chindof < 4) & (D0.t2_chindof < 4)] output['cutflow']['D0 trk chi2 cut'] += ak.sum(ak.num(D0)) D0 = D0[(D0.t1_nValid > 2) & (D0.t2_nValid > 2) & (D0.t1_nPix > 1) & (D0.t2_nPix > 1)] output['cutflow']['D0 trk hits cut'] += ak.sum(ak.num(D0)) D0 = D0[(D0.t1_dxy < 0.1) & (D0.t2_dxy < 0.1)] output['cutflow']['D0 trk dxy cut'] += ak.sum(ak.num(D0)) D0 = D0[(D0.t1_dz < 1.) & (D0.t2_dz < 1.)] output['cutflow']['D0 trk dz cut'] += ak.sum(ak.num(D0)) else: D0 = D0[(D0.t1_pt > 0.8) & (D0.t2_pt > 0.8)] output['cutflow']['D0 trk pt cut'] += ak.sum(ak.num(D0)) D0 = D0[(D0.t1_chindof < 2.5) & (D0.t2_chindof < 2.5)] output['cutflow']['D0 trk chi2 cut'] += ak.sum(ak.num(D0)) D0 = D0[(D0.t1_nValid > 4) & (D0.t2_nValid > 4) & (D0.t1_nPix > 1) & (D0.t2_nPix > 1)] output['cutflow']['D0 trk hits cut'] += ak.sum(ak.num(D0)) D0 = D0[(D0.t1_dxy < 0.1) & (D0.t2_dxy < 0.1)] output['cutflow']['D0 trk dxy cut'] += ak.sum(ak.num(D0)) D0 = D0[(D0.t1_dz < 1.) & (D0.t2_dz < 1.)] output['cutflow']['D0 trk dz cut'] += ak.sum(ak.num(D0)) # D0 cosphi if loose: D0 = D0[D0.cosphi > 0.1] else: D0 = D0[D0.cosphi > 0.99] output['cutflow']['D0 cosphi cut'] += ak.sum(ak.num(D0)) # D0 dl Significance if loose: D0 = D0[D0.dlSig > 5.] else: D0 = D0[D0.dlSig > 5.] output['cutflow']['D0 dlSig cut'] += ak.sum(ak.num(D0)) # D0 pt D0 = D0[D0.pt > 3.] output['cutflow']['D0 pt cut'] += ak.sum(ak.num(D0)) ############### Cuts for Dstar # trks cuts Dstar = Dstar[~Dstar.hasMuon] output['cutflow']['Dstar trk muon cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[(Dstar.K_pt > 0.5) & (Dstar.pi_pt > 0.5)] output['cutflow']['Dstar trk pt cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[(Dstar.K_chindof < 2.5) & (Dstar.pi_chindof < 2.5)] output['cutflow']['Dstar trk pt cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[(Dstar.K_nValid > 4) & (Dstar.pi_nValid > 4) & (Dstar.K_nPix > 1) & (Dstar.pi_nPix > 1)] output['cutflow']['Dstar trk hits cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[(Dstar.K_dxy < 0.1) & (Dstar.pi_dxy < 0.1)] output['cutflow']['Dstar trk pt cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[(Dstar.K_dz < 1) & (Dstar.pi_dz < 1)] output['cutflow']['Dstar trk pt cut'] += ak.sum(ak.num(Dstar)) # pis cuts Dstar = Dstar[Dstar.pis_pt > 0.3] output['cutflow']['Dstar pis pt cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[Dstar.pis_chindof < 3] output['cutflow']['Dstar pis chi2 cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[Dstar.pis_nValid > 2] output['cutflow']['Dstar pis hits cut'] += ak.sum(ak.num(Dstar)) # D0 of Dstar cuts Dstar = Dstar[Dstar.D0_cosphi > 0.99] output['cutflow']['Dstar D0 cosphi cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[(Dstar.D0_mass < D0_PDG_MASS + 0.025) & (Dstar.D0_mass > D0_PDG_MASS - 0.025)] output['cutflow']['Dstar D0 mass cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[Dstar.D0_pt > 3] output['cutflow']['Dstar D0 pt cut'] += ak.sum(ak.num(Dstar)) Dstar = Dstar[Dstar.D0_dlSig > 3] output['cutflow']['Dstar D0 dlSig cut'] += ak.sum(ak.num(Dstar)) Dstar['wrg_chg'] = (Dstar.K_chg == Dstar.pi_chg) ############### Dimu + OpenCharm associations DimuDstar = association(Dimu, Dstar) ############### Final computation of number of objects output['cutflow']['Dimu final'] += ak.sum(ak.num(Dimu)) output['cutflow']['D0 final'] += ak.sum(ak.num(D0)) output['cutflow']['Dstar final'] += ak.sum(ak.num(Dstar)) output['cutflow']['Dimu Dstar Associated'] += ak.sum(ak.num(DimuDstar)) ############### Leading and Trailing muon separation Gen_particles leading_mu = (Muon.slot0.pt > Muon.slot1.pt) Muon_lead = ak.where(leading_mu, Muon.slot0, Muon.slot1) Muon_trail = ak.where(~leading_mu, Muon.slot0, Muon.slot1) ############### Create the accumulators to save output # Primary vertex accumulator primary_vertex_acc = processor.dict_accumulator({}) for var in Primary_vertex.fields: primary_vertex_acc[var] = processor.column_accumulator(ak.to_numpy(Primary_vertex[var])) output["Primary_vertex"] = primary_vertex_acc # Gen Particles accumulator gen_part_acc = processor.dict_accumulator({}) if (self.analysis_type == 'mc'): for var in Gen_particles.fields: gen_part_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Gen_particles[var]))) gen_part_acc['nGenPart'] = processor.column_accumulator(ak.to_numpy(ak.num(Gen_particles))) output["Gen_particles"] = gen_part_acc # Gen Jpsi accumulator gen_jpsi_acc = processor.dict_accumulator({}) if (self.analysis_type == 'mc'): for var in Gen_Jpsi.fields: gen_jpsi_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Gen_Jpsi[var]))) gen_jpsi_acc['nGenJpsi'] = processor.column_accumulator(ak.to_numpy(ak.num(Gen_Jpsi))) output["Gen_Jpsi"] = gen_jpsi_acc # Gen Dstar accumulator gen_dstar_acc = processor.dict_accumulator({}) if (self.analysis_type == 'mc'): for var in Gen_Dstar.fields: gen_dstar_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Gen_Dstar[var]))) gen_dstar_acc['nGenDstar'] = processor.column_accumulator(ak.to_numpy(ak.num(Gen_Dstar[var]))) output["Gen_Dstar"] = gen_dstar_acc # Gen D0 accumulator gen_d0_acc = processor.dict_accumulator({}) if (self.analysis_type == 'mc'): for var in Gen_D0.fields: gen_d0_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Gen_D0[var]))) gen_d0_acc['nGenD0'] = processor.column_accumulator(ak.to_numpy(ak.num(Gen_D0[var]))) output["Gen_D0"] = gen_d0_acc muon_lead_acc = processor.dict_accumulator({}) for var in Muon_lead.fields: muon_lead_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Muon_lead[var]))) muon_lead_acc["nMuon"] = processor.column_accumulator(ak.to_numpy(ak.num(Muon_lead))) output["Muon_lead"] = muon_lead_acc muon_trail_acc = processor.dict_accumulator({}) for var in Muon_trail.fields: muon_trail_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Muon_trail[var]))) muon_trail_acc["nMuon"] = processor.column_accumulator(ak.to_numpy(ak.num(Muon_trail))) output["Muon_trail"] = muon_trail_acc dimu_acc = processor.dict_accumulator({}) for var in Dimu.fields: if (var.startswith('t')): continue dimu_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Dimu[var]))) dimu_acc["nDimu"] = processor.column_accumulator(ak.to_numpy(ak.num(Dimu))) output["Dimu"] = dimu_acc D0_acc = processor.dict_accumulator({}) D0_trk_acc = processor.dict_accumulator({}) for var in D0.fields: if (var.startswith('t')): D0_trk_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(D0[var]))) else: D0_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(D0[var]))) D0_acc["nD0"] = processor.column_accumulator(ak.to_numpy(ak.num(D0))) output["D0"] = D0_acc output["D0_trk"] = D0_trk_acc Dstar_acc = processor.dict_accumulator({}) Dstar_D0_acc = processor.dict_accumulator({}) Dstar_trk_acc = processor.dict_accumulator({}) for var in Dstar.fields: if var.startswith('D0'): Dstar_D0_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Dstar[var]))) elif (var.startswith('K') or var.startswith('pi')): Dstar_trk_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Dstar[var]))) else: Dstar_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(Dstar[var]))) Dstar_acc["nDstar"] = processor.column_accumulator(ak.to_numpy(ak.num(Dstar))) output["Dstar"] = Dstar_acc output["Dstar_D0"] = Dstar_D0_acc output["Dstar_trk"] = Dstar_trk_acc DimuDstar_acc = processor.dict_accumulator({}) DimuDstar_acc['Dimu'] = processor.dict_accumulator({}) DimuDstar_acc['Dstar'] = processor.dict_accumulator({}) for var in DimuDstar.fields: if (var == '0') or (var =='1'): continue elif var == 'cand': for i0 in DimuDstar[var].fields: DimuDstar_acc[i0] = processor.column_accumulator(ak.to_numpy(ak.flatten(DimuDstar[var][i0]))) else: DimuDstar_acc[var] = processor.column_accumulator(ak.to_numpy(ak.flatten(DimuDstar[var]))) for var in DimuDstar.slot0.fields: DimuDstar_acc['Dimu'][var] = processor.column_accumulator(ak.to_numpy(ak.flatten(DimuDstar.slot0[var]))) for var in DimuDstar.slot1.fields: DimuDstar_acc['Dstar'][var] = processor.column_accumulator(ak.to_numpy(ak.flatten(DimuDstar.slot1[var]))) DimuDstar_acc['nDimuDstar'] = processor.column_accumulator(ak.to_numpy(ak.num(DimuDstar))) output['DimuDstar'] = DimuDstar_acc file_hash = str(random.getrandbits(128)) + str(len(events)) save(output, "output/" + self.analyzer_name + "/" + self.analyzer_name + "_" + file_hash + ".coffea") # return dummy accumulator return processor.dict_accumulator({ 'cutflow': output['cutflow'] })