def test_lumimask(): lumimask = LumiMask( "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt" ) # pickle & unpickle lumimask_pickle = cloudpickle.loads(cloudpickle.dumps(lumimask)) # check same mask keys keys = lumimask._masks.keys() assert keys == lumimask_pickle._masks.keys() # check same mask values assert all(np.all(lumimask._masks[k] == lumimask_pickle._masks[k]) for k in keys) runs = np.array([303825, 123], dtype=np.uint32) lumis = np.array([115, 123], dtype=np.uint32) for lm in lumimask, lumimask_pickle: mask = lm(runs, lumis) print("mask:", mask) assert mask[0] assert not mask[1] # test underlying py_func py_mask = np.zeros(dtype="bool", shape=runs.shape) LumiMask._apply_run_lumi_mask_kernel.py_func(lm._masks, runs, lumis, py_mask) assert np.all(mask == py_mask) assert np.all(lumimask(runs, lumis) == lumimask_pickle(runs, lumis))
def test_lumimask(): lumimask = LumiMask( "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt" ) runs = np.array([303825, 123], dtype=np.uint32) lumis = np.array([115, 123], dtype=np.uint32) mask = lumimask(runs, lumis) print("mask:", mask) assert (mask[0] == True) assert (mask[1] == False)
def test_lumimask(): lumimask = LumiMask( "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt" ) runs = np.array([303825, 123], dtype=np.uint32) lumis = np.array([115, 123], dtype=np.uint32) mask = lumimask(runs, lumis) print("mask:", mask) assert (mask[0] == True) assert (mask[1] == False) # test underlying py_func py_mask = np.zeros(dtype='bool', shape=runs.shape) LumiMask.apply_run_lumi_mask_kernel.py_func(lumimask._masks, runs, lumis, py_mask) assert (np.all(mask == py_mask))
def get_data(self, f): import uproot from coffea.lumi_tools import LumiData, LumiList import numpy as np ret = {} file = uproot.open(f) tree = file['Events'] ret['data_entries'] = tree.numentries bl = file.get("LuminosityBlocks") runs = bl.array("run") lumis = bl.array("luminosityBlock") lumi_mask = LumiMask(self.parameters['lumimask']) lumi_filter = lumi_mask(runs, lumis) #print("Lumi filter eff.: ",lumi_filter.mean()) if len(runs[lumi_filter]) > 0: ret['lumi_list'] = LumiList(runs[lumi_filter], lumis[lumi_filter]) else: ret['lumi_list'] = LumiList() return ret
def process(self, events): output = self.accumulator.identity() # we can use a very loose preselection to filter the events. nothing is done with this presel, though presel = ak.num(events.Jet) > 0 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() #output['totalEvents']['all'] += len(events) #output['skimmedEvents']['all'] += len(ev) if self.year == 2018: triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL elif self.year == 2017: triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL elif self.year == 2016: triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ if self.year == 2018: lumimask = LumiMask( 'processors/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt' ) ## Electrons electron = Collections(ev, "Electron", "tight").get() electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)] loose_electron = Collections(ev, "Electron", "veto").get() loose_electron = loose_electron[(loose_electron.pt > 25) & (np.abs(loose_electron.eta) < 2.4)] SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron) == 2) OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron) == 2) dielectron = choose(electron, 2) dielectron_mass = (dielectron['0'] + dielectron['1']).mass dielectron_pt = (dielectron['0'] + dielectron['1']).pt leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[(leading_electron_idx)] leading_electron = leading_electron[(leading_electron.pt > 30)] trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1)) trailing_electron = electron[trailing_electron_idx] ##Muons loose_muon = Collections(ev, "Muon", "veto").get() loose_muon = loose_muon[(loose_muon.pt > 20) & (np.abs(loose_muon.eta) < 2.4)] #jets jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt', UL=False) jet = jet[ak.argsort( jet.pt, ascending=False )] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, loose_muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi #selections filters = getFilters(ev, year=self.year, dataset=dataset) mask = lumimask(ev.run, ev.luminosityBlock) ss = (SSelectron) os = (OSelectron) mass = (ak.min(np.abs(dielectron_mass - 91.2), axis=1) < 15) lead_electron = (ak.min(leading_electron.pt, axis=1) > 30) jet1 = (ak.num(jet) >= 1) jet2 = (ak.num(jet) >= 2) num_loose = ((ak.num(loose_electron) == 2) & (ak.num(loose_muon) == 0)) selection = PackedSelection() selection.add('filter', (filters)) selection.add('mask', (mask)) selection.add('ss', ss) selection.add('os', os) selection.add('mass', mass) selection.add('leading', lead_electron) selection.add('triggers', triggers) selection.add('one jet', jet1) selection.add('two jets', jet2) selection.add('num_loose', num_loose) bl_reqs = ['filter'] + ['mass'] + ['mask'] + ['triggers'] + [ 'leading' ] + ['num_loose'] #bl_reqs = ['filter'] + ['mass'] + ['triggers'] + ['leading'] + ['num_loose'] bl_reqs_d = {sel: True for sel in bl_reqs} baseline = selection.require(**bl_reqs_d) s_reqs = bl_reqs + ['ss'] s_reqs_d = {sel: True for sel in s_reqs} ss_sel = selection.require(**s_reqs_d) o_reqs = bl_reqs + ['os'] o_reqs_d = {sel: True for sel in o_reqs} os_sel = selection.require(**o_reqs_d) j1s_reqs = s_reqs + ['one jet'] j1s_reqs_d = {sel: True for sel in j1s_reqs} j1ss_sel = selection.require(**j1s_reqs_d) j1o_reqs = o_reqs + ['one jet'] j1o_reqs_d = {sel: True for sel in j1o_reqs} j1os_sel = selection.require(**j1o_reqs_d) j2s_reqs = s_reqs + ['two jets'] j2s_reqs_d = {sel: True for sel in j2s_reqs} j2ss_sel = selection.require(**j2s_reqs_d) j2o_reqs = o_reqs + ['two jets'] j2o_reqs_d = {sel: True for sel in j2o_reqs} j2os_sel = selection.require(**j2o_reqs_d) output["N_jet"].fill( dataset=dataset, multiplicity=ak.num(jet)[os_sel], ) return output
from coffea.lookup_tools import extractor from coffea.btag_tools import BTagScaleFactor # load definitions from definitions_analysis import parameters, eraDependentParameters, samples_info parameters.update(eraDependentParameters[args.year]) print(parameters) outdir = args.outdir if not os.path.exists(outdir): print(os.getcwd()) os.makedirs(outdir) if "Run" in args.sample: is_mc = False lumimask = LumiMask(parameters["lumimask"]) else: is_mc = True lumimask = None #define arrays to load: these are objects that will be kept together arrays_objects = [ "Jet_eta", "Jet_phi", "Jet_btagDeepB", "Jet_btagCSVV2", "Jet_btagDeepFlavB", "Jet_jetId", "Jet_puId", #"Jet_btagDeepFlavB" add for DeepFlavour "Muon_pt", "Muon_eta",
def load_sample(self, sample, parallelize=1): import glob, tqdm import uproot import multiprocessing as mp print("Loading", sample) if sample not in self.paths: print(f"Couldn't load {sample}! Skipping.") return { 'sample': sample, 'metadata': {}, 'files': {}, 'data_entries': 0, 'is_missing': True } all_files = [] metadata = {} data_entries = 0 data_runs = [] data_lumis = [] lumi_list = LumiList() if self.at_purdue: all_files = read_via_xrootd(self.server, self.paths[sample]) else: all_files = [ server + f for f in glob.glob(self.paths[sample] + '/**/**/*.root') ] # if 'ttjets_sl' in sample: # all_files = all_files[0:10] if self.debug: all_files = [all_files[0]] # all_files = [all_files[31]] sumGenWgts = 0 nGenEvts = 0 if parallelize > 1: pool = mp.Pool(parallelize) if 'data' in sample: a = [ pool.apply_async(self.get_data, args=(f, )) for f in all_files ] else: a = [ pool.apply_async(self.get_mc, args=(f, )) for f in all_files ] results = [] for process in a: process.wait() results.append(process.get()) pool.close() for ret in results: if 'data' in sample: data_entries += ret['data_entries'] lumi_list += ret['lumi_list'] else: sumGenWgts += ret['sumGenWgts'] nGenEvts += ret['nGenEvts'] else: for f in all_files: if 'data' in sample: tree = uproot.open(f)['Events'] data_entries += tree.numentries lumi_mask = LumiMask(self.parameters['lumimask']) lumi_filter = lumi_mask(tree.array('run'), tree.array('luminosityBlock')) lumi_list += LumiList( tree.array('run')[lumi_filter], tree.array('luminosityBlock')[lumi_filter]) else: tree = uproot.open(f)['Runs'] if 'NanoAODv6' in self.paths[sample]: sumGenWgts += tree.array('genEventSumw_')[0] nGenEvts += tree.array('genEventCount_')[0] else: sumGenWgts += tree.array('genEventSumw')[0] nGenEvts += tree.array('genEventCount')[0] metadata['sumGenWgts'] = sumGenWgts metadata['nGenEvts'] = nGenEvts files = {'files': all_files, 'treename': 'Events'} return {'sample': sample, 'metadata': metadata, 'files': files,\ 'data_entries':data_entries, 'lumi_list':lumi_list, 'is_missing':False}
def process(self, df): # Initialize timer if self.timer: self.timer.update() # Dataset name (see definitions in config/datasets.py) dataset = df.metadata["dataset"] is_mc = "data" not in dataset numevents = len(df) # ------------------------------------------------------------# # Apply HLT, lumimask, genweights, PU weights # and L1 prefiring weights # ------------------------------------------------------------# # All variables that we want to save # will be collected into the 'output' dataframe output = pd.DataFrame({"run": df.run, "event": df.event}) output.index.name = "entry" output["npv"] = df.PV.npvs output["met"] = df.MET.pt # Separate dataframe to keep track on weights # and their systematic variations weights = Weights(output) if is_mc: # For MC: Apply gen.weights, pileup weights, lumi weights, # L1 prefiring weights mask = np.ones(numevents, dtype=bool) genweight = df.genWeight weights.add_weight("genwgt", genweight) weights.add_weight("lumi", self.lumi_weights[dataset]) pu_wgts = pu_evaluator( self.pu_lookups, self.parameters, numevents, np.array(df.Pileup.nTrueInt), self.auto_pu, ) weights.add_weight("pu_wgt", pu_wgts, how="all") if self.parameters["do_l1prefiring_wgts"]: if "L1PreFiringWeight" in df.fields: l1pfw = l1pf_weights(df) weights.add_weight("l1prefiring_wgt", l1pfw, how="all") else: weights.add_weight("l1prefiring_wgt", how="dummy_vars") else: # For Data: apply Lumi mask lumi_info = LumiMask(self.parameters["lumimask"]) mask = lumi_info(df.run, df.luminosityBlock) # Apply HLT to both Data and MC hlt_columns = [c for c in self.parameters["hlt"] if c in df.HLT.fields] hlt = ak.to_pandas(df.HLT[hlt_columns]) if len(hlt_columns) == 0: hlt = False else: hlt = hlt[hlt_columns].sum(axis=1) if self.timer: self.timer.add_checkpoint("HLT, lumimask, PU weights") # ------------------------------------------------------------# # Update muon kinematics with Rochester correction, # FSR recovery and GeoFit correction # Raw pT and eta are stored to be used in event selection # ------------------------------------------------------------# # Save raw variables before computing any corrections df["Muon", "pt_raw"] = df.Muon.pt df["Muon", "eta_raw"] = df.Muon.eta df["Muon", "phi_raw"] = df.Muon.phi df["Muon", "pfRelIso04_all_raw"] = df.Muon.pfRelIso04_all # Rochester correction if self.do_roccor: apply_roccor(df, self.roccor_lookup, is_mc) df["Muon", "pt"] = df.Muon.pt_roch # variations will be in branches pt_roch_up and pt_roch_down # muons_pts = { # 'nominal': df.Muon.pt, # 'roch_up':df.Muon.pt_roch_up, # 'roch_down':df.Muon.pt_roch_down # } # for ... if True: # indent reserved for loop over muon pT variations # According to HIG-19-006, these variations have negligible # effect on significance, but it's better to have them # implemented in the future # FSR recovery if self.do_fsr: has_fsr = fsr_recovery(df) df["Muon", "pt"] = df.Muon.pt_fsr df["Muon", "eta"] = df.Muon.eta_fsr df["Muon", "phi"] = df.Muon.phi_fsr df["Muon", "pfRelIso04_all"] = df.Muon.iso_fsr # if FSR was applied, 'pt_fsr' will be corrected pt # if FSR wasn't applied, just copy 'pt' to 'pt_fsr' df["Muon", "pt_fsr"] = df.Muon.pt # GeoFit correction if self.do_geofit and ("dxybs" in df.Muon.fields): apply_geofit(df, self.year, ~has_fsr) df["Muon", "pt"] = df.Muon.pt_fsr if self.timer: self.timer.add_checkpoint("Muon corrections") # --- conversion from awkward to pandas --- # muon_columns = [ "pt", "pt_fsr", "eta", "phi", "charge", "ptErr", "mass", "pt_raw", "eta_raw", "pfRelIso04_all", ] + [self.parameters["muon_id"]] muons = ak.to_pandas(df.Muon[muon_columns]) # --------------------------------------------------------# # Select muons that pass pT, eta, isolation cuts, # muon ID and quality flags # Select events with 2 OS muons, no electrons, # passing quality cuts and at least one good PV # --------------------------------------------------------# # Apply event quality flags flags = ak.to_pandas(df.Flag[self.parameters["event_flags"]]) flags = flags[self.parameters["event_flags"]].product(axis=1) muons["pass_flags"] = True if self.parameters["muon_flags"]: muons["pass_flags"] = muons[ self.parameters["muon_flags"]].product(axis=1) # Define baseline muon selection (applied to pandas DF!) muons["selection"] = ( (muons.pt_raw > self.parameters["muon_pt_cut"]) & (abs(muons.eta_raw) < self.parameters["muon_eta_cut"]) & (muons.pfRelIso04_all < self.parameters["muon_iso_cut"]) & muons[self.parameters["muon_id"]] & muons.pass_flags) # Count muons nmuons = (muons[muons.selection].reset_index().groupby("entry") ["subentry"].nunique()) # Find opposite-sign muons mm_charge = muons.loc[muons.selection, "charge"].groupby("entry").prod() # Veto events with good quality electrons electrons = df.Electron[ (df.Electron.pt > self.parameters["electron_pt_cut"]) & (abs(df.Electron.eta) < self.parameters["electron_eta_cut"]) & (df.Electron[self.parameters["electron_id"]] == 1)] electron_veto = ak.to_numpy(ak.count(electrons.pt, axis=1) == 0) # Find events with at least one good primary vertex good_pv = ak.to_pandas(df.PV).npvsGood > 0 # Define baseline event selection output["two_muons"] = nmuons == 2 output["event_selection"] = (mask & (hlt > 0) & (flags > 0) & (nmuons == 2) & (mm_charge == -1) & electron_veto & good_pv) # --------------------------------------------------------# # Select two leading-pT muons # --------------------------------------------------------# # Find pT-leading and subleading muons # This is slow for large chunk size. # Consider reimplementing using sort_values().groupby().nth() # or sort_values().drop_duplicates() # or using Numba # https://stackoverflow.com/questions/50381064/select-the-max-row-per-group-pandas-performance-issue muons = muons[muons.selection & (nmuons == 2)] mu1 = muons.loc[muons.pt.groupby("entry").idxmax()] mu2 = muons.loc[muons.pt.groupby("entry").idxmin()] mu1.index = mu1.index.droplevel("subentry") mu2.index = mu2.index.droplevel("subentry") # --------------------------------------------------------# # Select events with muons passing leading pT cut # and trigger matching (trig match not done in final vrsn) # --------------------------------------------------------# # Events where there is at least one muon passing # leading muon pT cut pass_leading_pt = mu1.pt_raw > self.parameters["muon_leading_pt"] # update event selection with leading muon pT cut output["pass_leading_pt"] = pass_leading_pt output[ "event_selection"] = output.event_selection & output.pass_leading_pt # --------------------------------------------------------# # Fill dimuon and muon variables # --------------------------------------------------------# fill_muons(self, output, mu1, mu2, is_mc) if self.timer: self.timer.add_checkpoint("Event & muon selection") # ------------------------------------------------------------# # Prepare jets # ------------------------------------------------------------# prepare_jets(df, is_mc) # ------------------------------------------------------------# # Apply JEC, get JEC and JER variations # ------------------------------------------------------------# jets = df.Jet self.do_jec = False # We only need to reapply JEC for 2018 data # (unless new versions of JEC are released) if ("data" in dataset) and ("2018" in self.year): self.do_jec = True jets = apply_jec( df, jets, dataset, is_mc, self.year, self.do_jec, self.do_jecunc, self.do_jerunc, self.jec_factories, self.jec_factories_data, ) # ------------------------------------------------------------# # Calculate other event weights # ------------------------------------------------------------# if is_mc: do_nnlops = self.do_nnlops and ("ggh" in dataset) if do_nnlops: nnlopsw = nnlops_weights(df, numevents, self.parameters, dataset) weights.add_weight("nnlops", nnlopsw) else: weights.add_weight("nnlops", how="dummy") # --- --- --- --- --- --- --- --- --- --- --- --- --- --- # # do_zpt = ('dy' in dataset) # # if do_zpt: # zpt_weight = np.ones(numevents, dtype=float) # zpt_weight[two_muons] =\ # self.evaluator[self.zpt_path]( # output['dimuon_pt'][two_muons] # ).flatten() # weights.add_weight('zpt_wgt', zpt_weight) # --- --- --- --- --- --- --- --- --- --- --- --- --- --- # do_musf = True if do_musf: muID, muIso, muTrig = musf_evaluator(self.musf_lookup, self.year, numevents, mu1, mu2) weights.add_weight("muID", muID, how="all") weights.add_weight("muIso", muIso, how="all") weights.add_weight("muTrig", muTrig, how="all") else: weights.add_weight("muID", how="dummy_all") weights.add_weight("muIso", how="dummy_all") weights.add_weight("muTrig", how="dummy_all") # --- --- --- --- --- --- --- --- --- --- --- --- --- --- # do_lhe = (("LHEScaleWeight" in df.fields) and ("LHEPdfWeight" in df.fields) and ("nominal" in self.pt_variations)) if do_lhe: lhe_ren, lhe_fac = lhe_weights(df, output, dataset, self.year) weights.add_weight("LHERen", lhe_ren, how="only_vars") weights.add_weight("LHEFac", lhe_fac, how="only_vars") else: weights.add_weight("LHERen", how="dummy_vars") weights.add_weight("LHEFac", how="dummy_vars") # --- --- --- --- --- --- --- --- --- --- --- --- --- --- # do_thu = (("vbf" in dataset) and ("dy" not in dataset) and ("nominal" in self.pt_variations) and ("stage1_1_fine_cat_pTjet30GeV" in df.HTXS.fields)) if do_thu: for i, name in enumerate(self.sths_names): wgt_up = stxs_uncert( i, ak.to_numpy(df.HTXS.stage1_1_fine_cat_pTjet30GeV), 1.0, self.stxs_acc_lookups, self.powheg_xsec_lookup, ) wgt_down = stxs_uncert( i, ak.to_numpy(df.HTXS.stage1_1_fine_cat_pTjet30GeV), -1.0, self.stxs_acc_lookups, self.powheg_xsec_lookup, ) thu_wgts = {"up": wgt_up, "down": wgt_down} weights.add_weight("THU_VBF_" + name, thu_wgts, how="only_vars") else: for i, name in enumerate(self.sths_names): weights.add_weight("THU_VBF_" + name, how="dummy_vars") # --- --- --- --- --- --- --- --- --- --- --- --- --- --- # do_pdf = (self.do_pdf and ("nominal" in self.pt_variations) and ("dy" in dataset or "ewk" in dataset or "ggh" in dataset or "vbf" in dataset) and ("mg" not in dataset)) if "2016" in self.year: max_replicas = 0 if "dy" in dataset: max_replicas = 100 elif "ewk" in dataset: max_replicas = 33 else: max_replicas = 100 if do_pdf: pdf_wgts = df.LHEPdfWeight[:, 0:self. parameters["n_pdf_variations"]] for i in range(100): if (i < max_replicas) and do_pdf: output[f"pdf_mcreplica{i}"] = pdf_wgts[:, i] else: output[f"pdf_mcreplica{i}"] = np.nan else: if do_pdf: pdf_wgts = df.LHEPdfWeight[:, 0:self. parameters["n_pdf_variations"]][ 0] pdf_wgts = np.array(pdf_wgts) pdf_vars = { "up": (1 + 2 * pdf_wgts.std()), "down": (1 - 2 * pdf_wgts.std()), } weights.add_weight("pdf_2rms", pdf_vars, how="only_vars") else: weights.add_weight("pdf_2rms", how="dummy_vars") # --- --- --- --- --- --- --- --- --- --- --- --- --- --- # if is_mc: output = fill_gen_jets(df, output) # ------------------------------------------------------------# # Loop over JEC variations and fill jet variables # ------------------------------------------------------------# output.columns = pd.MultiIndex.from_product( [output.columns, [""]], names=["Variable", "Variation"]) if self.timer: self.timer.add_checkpoint("Jet preparation & event weights") for v_name in self.pt_variations: output_updated = self.jet_loop( v_name, is_mc, df, dataset, mask, muons, mu1, mu2, jets, weights, numevents, output, ) if output_updated is not None: output = output_updated if self.timer: self.timer.add_checkpoint("Jet loop") # ------------------------------------------------------------# # Fill outputs # ------------------------------------------------------------# mass = output.dimuon_mass output["region"] = None output.loc[((mass > 76) & (mass < 106)), "region"] = "z-peak" output.loc[((mass > 110) & (mass < 115.03)) | ((mass > 135.03) & (mass < 150)), "region", ] = "h-sidebands" output.loc[((mass > 115.03) & (mass < 135.03)), "region"] = "h-peak" output["dataset"] = dataset output["year"] = int(self.year) for wgt in weights.df.columns: skip_saving = (("nominal" not in wgt) and ("up" not in wgt) and ("down" not in wgt)) if skip_saving: continue output[f"wgt_{wgt}"] = weights.get_weight(wgt) columns_to_save = [ c for c in output.columns if (c[0] in self.vars_to_save) or ("wgt_" in c[0]) or ( "mcreplica" in c[0]) or (c[0] in ["region", "dataset", "year"]) or ("gjet" in c[0]) or ("gjj" in c[0]) ] output = output.loc[output.event_selection, columns_to_save] output = output.reindex(sorted(output.columns), axis=1) output.columns = [ " ".join(col).strip() for col in output.columns.values ] output = output[output.region.isin(self.regions)] """ input_evts = numevents output_evts = output.shape[0] out_yield = output.wgt_nominal.sum() out_vbf = output[ (output["jj_mass nominal"]>400) & (output["jj_dEta nominal"]>2.5) & (output["jet1_pt nominal"]>35) ].wgt_nominal.sum() out_ggh = out_yield - out_vbf print(f"\n{dataset}: {input_evts} -> {output_evts}; yield = {out_ggh} (ggH) + {out_vbf} (VBF) = {out_yield}") """ to_return = None if self.apply_to_output is None: to_return = output else: self.apply_to_output(output) to_return = self.accumulator.identity() if self.timer: self.timer.add_checkpoint("Saving outputs") self.timer.summary() return to_return
def build_lumimask(filename): from coffea.lumi_tools import LumiMask with importlib.resources.path("boostedhiggs.data", filename) as path: return LumiMask(path)
def process(self, df): self._configure(df) output = self.accumulator.identity() dataset = df['dataset'] # Lumi mask year = extract_year(dataset) if is_data(dataset): if year == 2016: json = bucoffea_path( 'data/json/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt' ) elif year == 2017: json = bucoffea_path( 'data/json/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt' ) elif year == 2018: json = bucoffea_path( 'data/json/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt' ) lumi_mask = LumiMask(json)(df['run'], df['luminosityBlock']) else: lumi_mask = np.ones(df.size) == 1 # MET filters if is_data(dataset): filt_met = mask_and(df, cfg.FILTERS.DATA) else: filt_met = mask_and(df, cfg.FILTERS.MC) if year == 2016: trigger = 'HLT_Photon175' else: trigger = 'HLT_Photon200' photons = setup_photons(df) ak4 = setup_jets(df) ak4 = ak4[ object_overlap(ak4, photons) \ & ak4.tightId \ & (ak4.pt > 100) \ & (ak4.abseta < 2.4) ] event_mask = filt_met \ & lumi_mask \ & (ak4.counts > 0) \ & df[trigger] \ & (df['MET_pt'] < 60) # Generator weight weights = processor.Weights(size=df.size, storeIndividual=True) if is_data(dataset): weights.add('gen', np.ones(df.size)) else: weights.add('gen', df['Generator_weight']) photon_kinematics = (photons.pt > 200) & (photons.barrel) # Medium vals = photons[photon_kinematics & photons.mediumId].sieie[event_mask] pt = photons[photon_kinematics & photons.mediumId].pt[event_mask] output['sieie'].fill(dataset=dataset, cat='medium', sieie=vals.flatten(), pt=pt.flatten(), weights=weight_shape( vals, weights.weight()[event_mask])) # No Sieie vals = photons[photon_kinematics & medium_id_no_sieie(photons)].sieie[event_mask] pt = photons[photon_kinematics & medium_id_no_sieie(photons)].pt[event_mask] output['sieie'].fill(dataset=dataset, cat='medium_nosieie', sieie=vals.flatten(), pt=pt.flatten(), weights=weight_shape( vals, weights.weight()[event_mask])) # No Sieie, inverted isolation vals = photons[photon_kinematics & medium_id_no_sieie_inv_iso(photons)].sieie[event_mask] pt = photons[photon_kinematics & medium_id_no_sieie_inv_iso(photons)].pt[event_mask] output['sieie'].fill(dataset=dataset, cat='medium_nosieie_invertiso', sieie=vals.flatten(), pt=pt.flatten(), weights=weight_shape( vals, weights.weight()[event_mask])) # Keep track of weight sum if not is_data(dataset): output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] return output
def process(self, events): output = self.accumulator.identity() # we can use a very loose preselection to filter the events. nothing is done with this presel, though presel = ak.num(events.Jet) > 0 if self.year == 2016: lumimask = LumiMask( '../data/lumi/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt' ) if self.year == 2017: lumimask = LumiMask( '../data/lumi/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt' ) if self.year == 2018: lumimask = LumiMask( '../data/lumi/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt' ) ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) if self.year == 2018: triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL elif self.year == 2017: triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL elif self.year == 2016: triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ ## Electrons electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get() electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)] loose_electron = Collections(ev, "Electron", "looseFCNC", 0, self.year).get() loose_electron = loose_electron[(loose_electron.pt > 25) & (np.abs(loose_electron.eta) < 2.4)] SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron) == 2) OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron) == 2) dielectron = choose(electron, 2) dielectron_mass = (dielectron['0'] + dielectron['1']).mass dielectron_pt = (dielectron['0'] + dielectron['1']).pt leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[(leading_electron_idx)] leading_electron = leading_electron[(leading_electron.pt > 30)] trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1)) trailing_electron = electron[trailing_electron_idx] ##Muons loose_muon = Collections(ev, "Muon", "looseFCNC", 0, self.year).get() loose_muon = loose_muon[(loose_muon.pt > 20) & (np.abs(loose_muon.eta) < 2.4)] #jets jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt') jet = jet[~match(jet, loose_muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi #weights weight = Weights(len(ev)) weight2 = Weights(len(ev)) weight2.add("charge flip", self.charge_flip_ratio.flip_weight(electron)) #selections filters = getFilters(ev, year=self.year, dataset=dataset, UL=False) mask = lumimask(ev.run, ev.luminosityBlock) ss = (SSelectron) os = (OSelectron) mass = (ak.min(np.abs(dielectron_mass - 91.2), axis=1) < 15) lead_electron = (ak.min(leading_electron.pt, axis=1) > 30) jet1 = (ak.num(jet) >= 1) jet2 = (ak.num(jet) >= 2) num_loose = ((ak.num(loose_electron) == 2) & (ak.num(loose_muon) == 0)) selection = PackedSelection() selection.add('filter', (filters)) selection.add('mask', (mask)) selection.add('ss', ss) selection.add('os', os) selection.add('mass', mass) selection.add('leading', lead_electron) selection.add('triggers', triggers) selection.add('one jet', jet1) selection.add('two jets', jet2) selection.add('num_loose', num_loose) bl_reqs = ['filter'] + ['triggers'] + ['mask'] bl_reqs_d = {sel: True for sel in bl_reqs} baseline = selection.require(**bl_reqs_d) s_reqs = bl_reqs + ['ss'] + ['mass'] + ['num_loose'] + ['leading'] s_reqs_d = {sel: True for sel in s_reqs} ss_sel = selection.require(**s_reqs_d) o_reqs = bl_reqs + ['os'] + ['mass'] + ['num_loose'] + ['leading'] o_reqs_d = {sel: True for sel in o_reqs} os_sel = selection.require(**o_reqs_d) j1s_reqs = s_reqs + ['one jet'] j1s_reqs_d = {sel: True for sel in j1s_reqs} j1ss_sel = selection.require(**j1s_reqs_d) j1o_reqs = o_reqs + ['one jet'] j1o_reqs_d = {sel: True for sel in j1o_reqs} j1os_sel = selection.require(**j1o_reqs_d) j2s_reqs = s_reqs + ['two jets'] j2s_reqs_d = {sel: True for sel in j2s_reqs} j2ss_sel = selection.require(**j2s_reqs_d) j2o_reqs = o_reqs + ['two jets'] j2o_reqs_d = {sel: True for sel in j2o_reqs} j2os_sel = selection.require(**j2o_reqs_d) #outputs output["electron_data1"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_electron[os_sel].pt)), eta=ak.to_numpy(ak.flatten(leading_electron[os_sel].eta)), phi=ak.to_numpy(ak.flatten(leading_electron[os_sel].phi)), weight=weight2.weight()[os_sel]) output["electron_data2"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_electron[os_sel].pt)), eta=ak.to_numpy(ak.flatten(trailing_electron[os_sel].eta)), phi=ak.to_numpy(ak.flatten(trailing_electron[os_sel].phi)), weight=weight2.weight()[os_sel]) output["electron_data3"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].pt)), eta=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].eta)), phi=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].phi)), weight=weight2.weight()[j1os_sel]) output["electron_data4"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].pt)), eta=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].eta)), phi=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].phi)), weight=weight2.weight()[j1os_sel]) output["electron_data5"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].pt)), eta=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].eta)), phi=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].phi)), weight=weight2.weight()[j2os_sel]) output["electron_data6"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].pt)), eta=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].eta)), phi=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].phi)), weight=weight2.weight()[j2os_sel]) output["electron_data7"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_electron[ss_sel].pt)), eta=ak.to_numpy(ak.flatten(leading_electron[ss_sel].eta)), phi=ak.to_numpy(ak.flatten(leading_electron[ss_sel].phi)), weight=weight.weight()[ss_sel]) output["electron_data8"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].pt)), eta=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].eta)), phi=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].phi)), weight=weight.weight()[ss_sel]) output["electron_data9"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].pt)), eta=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].eta)), phi=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].phi)), weight=weight.weight()[j1ss_sel]) output["electron_data10"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].pt)), eta=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].eta)), phi=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].phi)), weight=weight.weight()[j1ss_sel]) output["electron_data11"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].pt)), eta=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].eta)), phi=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].phi)), weight=weight.weight()[j2ss_sel]) output["electron_data12"].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].pt)), eta=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].eta)), phi=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].phi)), weight=weight.weight()[j2ss_sel]) output["dilep_mass1"].fill( dataset=dataset, mass=ak.to_numpy(ak.flatten(dielectron_mass[os_sel])), pt=ak.to_numpy(ak.flatten(dielectron_pt[os_sel])), weight=weight2.weight()[os_sel]) output["dilep_mass2"].fill( dataset=dataset, mass=ak.to_numpy(ak.flatten(dielectron_mass[j1os_sel])), pt=ak.to_numpy(ak.flatten(dielectron_pt[j1os_sel])), weight=weight2.weight()[j1os_sel]) output["dilep_mass3"].fill( dataset=dataset, mass=ak.to_numpy(ak.flatten(dielectron_mass[j2os_sel])), pt=ak.to_numpy(ak.flatten(dielectron_pt[j2os_sel])), weight=weight2.weight()[j2os_sel]) output["dilep_mass4"].fill( dataset=dataset, mass=ak.to_numpy(ak.flatten(dielectron_mass[ss_sel])), pt=ak.to_numpy(ak.flatten(dielectron_pt[ss_sel])), weight=weight.weight()[ss_sel]) output["dilep_mass5"].fill( dataset=dataset, mass=ak.to_numpy(ak.flatten(dielectron_mass[j1ss_sel])), pt=ak.to_numpy(ak.flatten(dielectron_pt[j1ss_sel])), weight=weight.weight()[j1ss_sel]) output["dilep_mass6"].fill( dataset=dataset, mass=ak.to_numpy(ak.flatten(dielectron_mass[j2ss_sel])), pt=ak.to_numpy(ak.flatten(dielectron_pt[j2ss_sel])), weight=weight.weight()[j2ss_sel]) output["MET"].fill(dataset=dataset, pt=met_pt[os_sel], weight=weight2.weight()[os_sel]) output["MET2"].fill(dataset=dataset, pt=met_pt[j1os_sel], weight=weight2.weight()[j1os_sel]) output["MET3"].fill(dataset=dataset, pt=met_pt[j2os_sel], weight=weight2.weight()[j2os_sel]) output["MET4"].fill(dataset=dataset, pt=met_pt[ss_sel], weight=weight.weight()[ss_sel]) output["MET5"].fill(dataset=dataset, pt=met_pt[j1ss_sel], weight=weight.weight()[j1ss_sel]) output["MET6"].fill(dataset=dataset, pt=met_pt[j2ss_sel], weight=weight.weight()[j2ss_sel]) output["N_jet"].fill(dataset=dataset, multiplicity=ak.num(jet)[os_sel], weight=weight2.weight()[os_sel]) output["N_jet2"].fill(dataset=dataset, multiplicity=ak.num(jet)[j1os_sel], weight=weight2.weight()[j1os_sel]) output["N_jet3"].fill(dataset=dataset, multiplicity=ak.num(jet)[j2os_sel], weight=weight2.weight()[j2os_sel]) output["N_jet4"].fill(dataset=dataset, multiplicity=ak.num(jet)[ss_sel], weight=weight.weight()[ss_sel]) output["N_jet5"].fill(dataset=dataset, multiplicity=ak.num(jet)[j1ss_sel], weight=weight.weight()[j1ss_sel]) output["N_jet6"].fill(dataset=dataset, multiplicity=ak.num(jet)[j2ss_sel], weight=weight.weight()[j2ss_sel]) output["PV_npvsGood"].fill(dataset=dataset, multiplicity=ev.PV[os_sel].npvsGood, weight=weight2.weight()[os_sel]) output["PV_npvsGood2"].fill(dataset=dataset, multiplicity=ev.PV[j1os_sel].npvsGood, weight=weight2.weight()[j1os_sel]) output["PV_npvsGood3"].fill(dataset=dataset, multiplicity=ev.PV[j2os_sel].npvsGood, weight=weight2.weight()[j2os_sel]) output["PV_npvsGood4"].fill(dataset=dataset, multiplicity=ev.PV[ss_sel].npvsGood, weight=weight.weight()[ss_sel]) output["PV_npvsGood5"].fill(dataset=dataset, multiplicity=ev.PV[j1ss_sel].npvsGood, weight=weight.weight()[j1ss_sel]) output["PV_npvsGood6"].fill(dataset=dataset, multiplicity=ev.PV[j2ss_sel].npvsGood, weight=weight.weight()[j2ss_sel]) return output