def test_modulo_units(): array1 = ak.values_astype(ak.Array([1]), np.dtype("datetime64[100as/1]")) assert array1.to_list() == [ np.datetime64("1970-01-01T00:00:00.000000000000000100", "100as") ] array2 = ak.values_astype(ak.Array([1]), np.dtype("datetime64[10s/2]")) assert array2.to_list() == [ np.datetime64("1970-01-01T00:00:05.000", "5000ms") ]
def test_values_astype_datetime(): array1 = ak.values_astype(ak.Array([1567416600000]), "datetime64[ms]") assert str(array1.type) == "1 * datetime64" assert array1.to_list() == [np.datetime64("2019-09-02T09:30:00")] array2 = ak.values_astype(ak.Array([1567416600000]), np.dtype("M8[ms]")) assert str(array2.type) == "1 * datetime64" assert array2.to_list() == [np.datetime64("2019-09-02T09:30:00")] array3 = ak.values_astype( ak.Array([1567416600000000, None]), np.datetime64 # default unit is 'us' ) assert array3.to_list() == [np.datetime64("2019-09-02T09:30:00"), None]
def test_ufunc_afterward(): assert (ak.values_astype(ak.Array([{ "x": 1.1 }, { "x": 3.3 }]), np.float32)["x"] + 1).tolist() == [2.0999999046325684, 4.300000190734863]
def add_jec_variables(jets, event_rho): jets["pt_raw"] = (1 - jets.rawFactor) * jets.pt jets["mass_raw"] = (1 - jets.rawFactor) * jets.mass jets["pt_gen"] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) jets["event_rho"] = ak.broadcast_arrays(event_rho, jets.pt)[0] return jets
def test(): a = ak.values_astype(ak.Array([1, None]), np.float32) assert ak.fill_none(a, np.float32(0)).tolist() == [1, 0] assert str(ak.fill_none(a, np.float32(0)).type) == "2 * float32" assert ak.fill_none(a, np.array(0, np.float32)).tolist() == [1, 0] assert str(ak.fill_none(a, np.array(0, np.float32)).type) == "2 * float32" assert ak.fill_none(a, np.array([0], np.float32)).tolist() == [1, [0]] assert (str(ak.fill_none(a, np.array( [0], np.float32)).type) == "2 * union[float32, 1 * float32]") assert ak.fill_none(a, np.array([[0]], np.float32)).tolist() == [1, [[0]]] assert (str(ak.fill_none(a, np.array( [[0]], np.float32)).type) == "2 * union[float32, 1 * 1 * float32]") assert ak.fill_none(a, 0).tolist() == [1, 0] assert str(ak.fill_none(a, 0).type) == "2 * float64" assert ak.fill_none(a, [0]).tolist() == [1, [0]] assert str(ak.fill_none(a, [0]).type) == "2 * union[float32, 1 * int64]" assert ak.fill_none(a, [[0]]).tolist() == [1, [[0]]] assert str(ak.fill_none( a, [[0]]).type) == "2 * union[float32, 1 * var * int64]"
def run(): events = NanoEventsFactory.from_root( os.path.abspath("tests/samples/nano_dy.root"), persistent_cache=array_log, ).events() jets = events.Jet met = events.MET jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"] jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"] jets["pt_gen"] = ak.values_astype( ak.fill_none(jets.matched_gen.pt, 0.0), np.float32) jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] jec_cache = cachetools.Cache(np.inf) weakref.finalize(jec_cache, jec_finalized.set) corrected_jets = jet_factory.build(jets, lazy_cache=jec_cache) corrected_met = met_factory.build(met, corrected_jets, lazy_cache=jec_cache) print(corrected_met.pt_orig) print(corrected_met.pt) for unc in jet_factory.uncertainties() + met_factory.uncertainties(): print(unc, corrected_met[unc].up.pt) print(unc, corrected_met[unc].down.pt) for unc in jet_factory.uncertainties(): print(unc, corrected_jets[unc].up.pt) print("Finalized:", array_log.finalized)
def test_string(): assert ak.values_astype(ak.Array([{ "x": 1.1, "y": "hello" }]), np.float32).tolist() == [{ "x": 1.100000023841858, "y": "hello" }]
def test_RegularArray_and_ListArray(): content = ak.layout.NumpyArray( np.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) offsets = ak.layout.Index64(np.array([0, 3, 3, 5, 6, 10, 10])) listoffsetarray = ak.layout.ListOffsetArray64(offsets, content) regulararray = ak.layout.RegularArray(listoffsetarray, 2, zeros_length=0) starts = ak.layout.Index64(np.array([0, 1])) stops = ak.layout.Index64(np.array([2, 3])) listarray = ak.layout.ListArray64(starts, stops, regulararray) assert str(ak.type(content)) == "float64" assert str(ak.type(regulararray)) == "2 * var * float64" assert str(ak.type(listarray)) == "var * 2 * var * float64" regulararray_int8 = ak.values_astype(regulararray, "int8", highlevel=False) assert str(ak.type(regulararray_int8)) == "2 * var * int8" listarray_bool = ak.values_astype(listarray, "bool", highlevel=False) assert str(ak.type(listarray_bool)) == "var * 2 * var * bool"
def prepare_jets(df, is_mc): # Initialize missing fields (needed for JEC) df["Jet", "pt_raw"] = (1 - df.Jet.rawFactor) * df.Jet.pt df["Jet", "mass_raw"] = (1 - df.Jet.rawFactor) * df.Jet.mass df["Jet", "rho"] = ak.broadcast_arrays(df.fixedGridRhoFastjetAll, df.Jet.pt)[0] if is_mc: df["Jet", "pt_gen"] = ak.values_astype(ak.fill_none(df.Jet.matched_gen.pt, 0), np.float32)
def test_UnmaskedArray(): content_float64 = ak.layout.NumpyArray( np.array([0.25, 0.5, 3.5, 4.5, 5.5], dtype=np.float64)) array_float64 = ak.layout.UnmaskedArray(content_float64) assert ak.to_list(array_float64) == [0.25, 0.5, 3.5, 4.5, 5.5] assert str(ak.type(content_float64)) == "float64" assert str(ak.type(ak.Array(content_float64))) == "5 * float64" assert str(ak.type(array_float64)) == "?float64" assert str(ak.type(ak.Array(array_float64))) == "5 * ?float64" assert np.can_cast(np.float32, np.float64) is True assert np.can_cast(np.float64, np.float32, "unsafe") is True assert np.can_cast(np.float64, np.int8, "unsafe") is True content_float32 = ak.values_astype(content_float64, "float32", highlevel=False) array_float32 = ak.layout.UnmaskedArray(content_float32) assert ak.to_list(array_float32) == [0.25, 0.5, 3.5, 4.5, 5.5] assert str(ak.type(content_float32)) == "float32" assert str(ak.type(ak.Array(content_float32))) == "5 * float32" assert str(ak.type(array_float32)) == "?float32" assert str(ak.type(ak.Array(array_float32))) == "5 * ?float32" content_int8 = ak.values_astype(content_float64, "int8", highlevel=False) array_int8 = ak.layout.UnmaskedArray(content_int8) assert ak.to_list(array_int8) == [0, 0, 3, 4, 5] assert str(ak.type(content_int8)) == "int8" assert str(ak.type(ak.Array(content_int8))) == "5 * int8" assert str(ak.type(array_int8)) == "?int8" assert str(ak.type(ak.Array(array_int8))) == "5 * ?int8" content_from_int8 = ak.values_astype(content_int8, "float64", highlevel=False) array_from_int8 = ak.layout.UnmaskedArray(content_from_int8) assert ak.to_list(array_from_int8) == [0, 0, 3, 4, 5] assert str(ak.type(content_from_int8)) == "float64" assert str(ak.type(ak.Array(content_from_int8))) == "5 * float64" assert str(ak.type(array_from_int8)) == "?float64" assert str(ak.type(ak.Array(array_from_int8))) == "5 * ?float64"
def test_float_values_astype_datetime(): array = ak.Array([1.9999, 1567416600000, 0, None, 11, 0.555]) assert str(array.type) == "6 * ?float64" dt_array = ak.values_astype(array, "datetime64[ms]") assert str(dt_array.type) == "6 * ?datetime64" assert dt_array.to_list() == [ np.datetime64("1970-01-01T00:00:00.001"), np.datetime64("2019-09-02T09:30:00.000"), np.datetime64("1970-01-01T00:00:00.000"), None, np.datetime64("1970-01-01T00:00:00.011"), np.datetime64("1970-01-01T00:00:00.000"), ]
def sort_overlap(visibles, roots, eventWise): visibles = ak.values_astype(visibles, int) momentums = np.array((eventWise.Px[roots], eventWise.Py[roots], eventWise.Rapidity[roots])).T all_visibles = sorted(set(ak.flatten(visibles))) new_visibles = [[] for _ in roots] for i, vis in enumerate(all_visibles): in_showers = [ n_s for n_s, shower in enumerate(visibles) if vis in shower ] if len(in_showers) == 1: new_visibles[in_showers[0]].append(vis) continue p = np.array( [eventWise.Px[vis], eventWise.Py[vis], eventWise.Rapidity[vis]]) abs_p = np.sqrt(np.sum(p**2)) abs_m = np.sqrt(np.sum(momentums[in_showers]**2, axis=1)) cos_angle = np.sum(p * momentums[in_showers], axis=1) / (abs_p * abs_m) new_visibles[in_showers[np.argmax(cos_angle)]].append(vis) new_visibles = ak.from_iter(new_visibles) assert len(set(ak.flatten(new_visibles))) == len(ak.flatten(new_visibles)) assert len(all_visibles) == len(ak.flatten(new_visibles)) return new_visibles
def find_jets(events: ak.Array, settings: JetFinderSettings) -> ak.Array: """ Find jets according to the events and settings. This wrapper is provided so we don't have to remember to pass a layout and reconstruct in an `ak.Array`. It's all for convenience. Args: events: Awkward array containing particles within events. Must contain "E", "px", "py", and "pz" columns. settings: Jet finder settings for configuring a ClusterSequenceArea (ie. the jet JetDefinition and GhostedAreaSpec definitions. Returns: Jets found according to the settings. """ _jets, _constituents = _find_jets(events=events.layout, settings=settings) return ak.zip( { "jets": ak.Array(_jets), "constituent_indices": ak.values_astype(ak.Array(_constituents), np.int32), }, depth_limit=1, )
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = sample_name # events.metadata['dataset'] # Data or MC isData = "genWeight" not in events.fields isFake = self._isFake # Stop processing if there is no event remain if len(events) == 0: return out # Golden Json file if (self._year == "2018") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABD" if (self._year == "2017") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" # <----- Get Scale factors ------># if not isData: # Egamma reco ID get_ele_reco_above20_sf = self._corrections["get_ele_reco_above20_sf"][ self._year ] get_ele_medium_id_sf = self._corrections["get_ele_medium_id_sf"][self._year] get_pho_medium_id_sf = self._corrections["get_pho_medium_id_sf"][self._year] # DoubleEG trigger # 2016, 2017 are not applied yet if self._year == "2018": get_ele_trig_leg1_SF = self._corrections["get_ele_trig_leg1_SF"][ self._year ] get_ele_trig_leg1_data_Eff = self._corrections[ "get_ele_trig_leg1_data_Eff" ][self._year] get_ele_trig_leg1_mc_Eff = self._corrections[ "get_ele_trig_leg1_mc_Eff" ][self._year] get_ele_trig_leg2_SF = self._corrections["get_ele_trig_leg2_SF"][ self._year ] get_ele_trig_leg2_data_Eff = self._corrections[ "get_ele_trig_leg2_data_Eff" ][self._year] get_ele_trig_leg2_mc_Eff = self._corrections[ "get_ele_trig_leg2_mc_Eff" ][self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64") pu = self._puweight_arr[pu_weight_idx] print("## pu_idx: ",len(pu_weight_idx),pu_weight_idx) print("## pu_arr: ",len(self._puweight_arr),self._puweight_arr) print("## pu:",len(pu),pu) selection = processor.PackedSelection() # Cut flow cut0 = np.zeros(len(events)) out["cutflow"].fill(dataset=dataset, cutflow=cut0) # <----- Helper functions ------># # Sort by PT helper function def sort_by_pt(ele, pho, jet): ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)] pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)] jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)] return ele, pho, jet # Lorentz vectors from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) def TLorentz_vector(vec): vec = ak.zip( {"x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t}, with_name="LorentzVector", ) return vec def TLorentz_vector_cylinder(vec): vec = ak.zip( { "pt": vec.pt, "eta": vec.eta, "phi": vec.phi, "mass": vec.mass, }, with_name="PtEtaPhiMLorentzVector", ) return vec # <----- Selection ------># Initial_events = events # Good Run ( Golden Json files ) from coffea import lumi_tools if isData: lumi_mask_builder = lumi_tools.LumiMask(injson) lumimask = ak.Array( lumi_mask_builder.__call__(events.run, events.luminosityBlock) ) events = events[lumimask] # print("{0}% of files pass good-run conditions".format(len(events)/ len(Initial_events))) # Stop processing if there is no event remain if len(events) == 0: return out ##----------- Cut flow1: Passing Triggers # double lepton trigger is_double_ele_trigger = True if not is_double_ele_trigger: double_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path] # single lepton trigger is_single_ele_trigger = True if not is_single_ele_trigger: single_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path] events.Electron, events.Photon, events.Jet = sort_by_pt( events.Electron, events.Photon, events.Jet ) # Good Primary vertex nPV = events.PV.npvsGood nPV_nw = events.PV.npvsGood if not isData: nPV = nPV * pu print(pu) # Apply cut1 events = events[double_ele_triggers_arr] if not isData: pu = pu[double_ele_triggers_arr] # Stop processing if there is no event remain if len(events) == 0: return out cut1 = np.ones(len(events)) out["cutflow"].fill(dataset=dataset, cutflow=cut1) # Set Particles Electron = events.Electron Muon = events.Muon Photon = events.Photon MET = events.MET Jet = events.Jet # --Muon ( only used to calculate dR ) MuSelmask = ( (Muon.pt >= 10) & (abs(Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15) ) Muon = Muon[MuSelmask] # --Loose Muon ( For Loose Muon veto ) LoooseMuSelmask = ( (Muon.pt > 20) & (abs(Muon.eta) < 2.4) & (Muon.isPFcand) & (Muon.isGlobal | Muon.isTracker) & (Muon.pfRelIso03_all < 0.25) ) # Reference: VBS Zgamma+2jets VetoMuon = Muon[LoooseMuSelmask] ##----------- Cut flow2: Electron Selection EleSelmask = ( (Electron.pt >= 10) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1) ) | ( (Electron.pt >= 10) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2) ) Electron = Electron[EleSelmask] # Event with 3 Electrons # apply cut 2 Tri_electron_mask = ak.num(Electron) == 3 Electron = Electron[Tri_electron_mask] Photon = Photon[Tri_electron_mask] Jet = Jet[Tri_electron_mask] MET = MET[Tri_electron_mask] Muon = Muon[Tri_electron_mask] VetoMuon = VetoMuon[Tri_electron_mask] if not isData: pu = pu[Tri_electron_mask] events = events[Tri_electron_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut2 = np.ones(len(Photon)) * 2 out["cutflow"].fill(dataset=dataset, cutflow=cut2) ##----------- Cut flow3: 4th lepton veto (Loose Muon) # Veto 4th Loose muon # apply cut 3 fourth_lepton_veto = ak.num(VetoMuon) < 1 Electron = Electron[fourth_lepton_veto] Photon = Photon[fourth_lepton_veto] Jet = Jet[fourth_lepton_veto] MET = MET[fourth_lepton_veto] Muon = Muon[fourth_lepton_veto] if not isData: pu = pu[fourth_lepton_veto] events = events[fourth_lepton_veto] # Stop processing if there is no event remain if len(Electron) == 0: return out cut3 = np.ones(len(Photon)) * 3 out["cutflow"].fill(dataset=dataset, cutflow=cut3) ##----------- Cut flow4: Photon Selection # Basic photon selection isgap_mask = (abs(Photon.eta) < 1.442) | ( (abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5) ) Pixel_seed_mask = ~Photon.pixelSeed if (dataset == "ZZ") and (self._year == "2017"): PT_ID_mask = (Photon.pt >= 20) & ( Photon.cutBasedBitmap >= 3 ) # 2^0(Loose) + 2^1(Medium) + 2^2(Tights) else: PT_ID_mask = (Photon.pt >= 20) & (Photon.cutBased > 1) # dR cut with selected Muon and Electrons dr_pho_ele_mask = ak.all( Photon.metric_table(Electron) >= 0.5, axis=-1 ) # default metric table: delta_r dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1) # genPartFlav cut """ if dataset == "WZG": isPrompt = (Photon.genPartFlav == 1) | (Photon.genPartFlav == 11) PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & isPrompt & dr_pho_ele_mask & dr_pho_mu_mask elif dataset == "WZ": isPrompt = (Photon.genPartFlav == 1) PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & ~isPrompt & dr_pho_ele_mask & dr_pho_mu_mask else: PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask """ PhoSelmask = ( PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask ) Photon = Photon[PhoSelmask] # Apply cut 4 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut4 = np.ones(len(Photon)) * 4 out["cutflow"].fill(dataset=dataset, cutflow=cut4) ##----------- Cut flow5: OSSF # OSSF index maker @numba.njit def find_3lep(events_leptons, builder): for leptons in events_leptons: builder.begin_list() nlep = len(leptons) for i0 in range(nlep): for i1 in range(i0 + 1, nlep): if leptons[i0].charge + leptons[i1].charge != 0: continue for i2 in range(nlep): if len({i0, i1, i2}) < 3: continue builder.begin_tuple(3) builder.index(0).integer(i0) builder.index(1).integer(i1) builder.index(2).integer(i2) builder.end_tuple() builder.end_list() return builder eee_triplet_idx = find_3lep(Electron, ak.ArrayBuilder()).snapshot() ossf_mask = ak.num(eee_triplet_idx) == 2 # Apply cut 5 eee_triplet_idx = eee_triplet_idx[ossf_mask] Electron = Electron[ossf_mask] Photon = Photon[ossf_mask] Jet = Jet[ossf_mask] MET = MET[ossf_mask] if not isData: pu = pu[ossf_mask] events = events[ossf_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut5 = np.ones(ak.sum(ak.num(Electron) > 0)) * 5 out["cutflow"].fill(dataset=dataset, cutflow=cut5) # Define Electron Triplet Triple_electron = [Electron[eee_triplet_idx[idx]] for idx in "012"] Triple_eee = ak.zip( { "lep1": Triple_electron[0], "lep2": Triple_electron[1], "lep3": Triple_electron[2], "p4": TLorentz_vector(Triple_electron[0] + Triple_electron[1]), } ) # Ele pair selector --> Close to Z mass bestZ_idx = ak.singletons(ak.argmin(abs(Triple_eee.p4.mass - 91.1876), axis=1)) Triple_eee = Triple_eee[bestZ_idx] leading_ele = Triple_eee.lep1 subleading_ele = Triple_eee.lep2 third_ele = Triple_eee.lep3 def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -- Scale Factor for each electron # Trigger weight helper function def Trigger_Weight(eta1, pt1, eta2, pt2): per_ev_MC = ( get_ele_trig_leg1_mc_Eff(eta1, pt1) * get_ele_trig_leg2_mc_Eff(eta2, pt2) + get_ele_trig_leg1_mc_Eff(eta2, pt2) * get_ele_trig_leg2_mc_Eff(eta1, pt1) - get_ele_trig_leg1_mc_Eff(eta1, pt1) * get_ele_trig_leg1_mc_Eff(eta2, pt2) ) per_ev_data = ( get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF(eta1, pt1) * get_ele_trig_leg2_data_Eff(eta2, pt2) * get_ele_trig_leg2_SF(eta2, pt2) + get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF(eta2, pt2) * get_ele_trig_leg2_data_Eff(eta1, pt1) * get_ele_trig_leg2_SF(eta1, pt1) - get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF(eta1, pt1) * get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF(eta2, pt2) ) return per_ev_data / per_ev_MC if not isData: ## -------------< Egamma ID and Reco Scale factor > -----------------## get_pho_medium_id_sf = get_pho_medium_id_sf( ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt) ) ele_reco_sf = ( get_ele_reco_above20_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt), ) * get_ele_reco_above20_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt), ) * get_ele_reco_above20_sf( ak.flatten(third_ele.deltaEtaSC + third_ele.eta), ak.flatten(third_ele.pt), ) ) ele_medium_id_sf = ( get_ele_medium_id_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt), ) * get_ele_medium_id_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt), ) * get_ele_medium_id_sf( ak.flatten(third_ele.deltaEtaSC + third_ele.eta), ak.flatten(third_ele.pt), ) ) ## -------------< Double Electron Trigger Scale factor > -----------------## eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta) eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta) pt1 = ak.flatten(leading_ele.pt) pt2 = ak.flatten(subleading_ele.pt) # -- 2017,2016 are not applied yet if self._year == "2018": ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2) ##----------- Cut flow6: Event selection # Mee cut diele = Triple_eee.p4 Mee_cut_mask = ak.firsts(diele.mass) > 4 # Z mass window # zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) < 15 # SR, CR_ZZA, CR_Z+jets, CR_Conversion # zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) > 5 # CR_t-enriched # zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) > 15 # CR_Conversion # M(eee) cut SR, CR_ZZA, CR_Z+jets, CR_t enriched # eee = Triple_eee.lep1 + Triple_eee.lep2 + Triple_eee.lep3 # Meee_cut_mask = ak.firsts(eee.mass > 100) # Meee_cut_mask = ak.firsts(eee.mass <= 100) # b-Jet veto cut #SR, CR_ZZA, CR_Z+jets, CR_Conversion # bjet_mask = (Jet.btagCSVV2 > 0.4184) & (Jet.pt > 30) # bjet_veto_mask = ak.num(Jet[bjet_mask]) == 0 # bjet_veto_mask = ak.num(Jet[bjet_mask]) > 0 # CR_t-enriched # Electron PT cuts Elept_mask = ak.firsts( (leading_ele.pt >= 25) & (subleading_ele.pt >= 10) & (third_ele.pt >= 25) ) # MET cuts MET_mask = MET > 20 # Baseline # MET_mask = MET.pt > 30 # SR, CR-ZZE, CR-t-entirched # MET_mask = MET.pt <= 30 # CR-Z+jets. CR-Conversion # Mask # Event_sel_mask = Elept_mask & MET_mask & bjet_veto_mask & Mee_cut_mask & zmass_window_mask & Meee_cut_mask # SR,CR Event_sel_mask = Elept_mask & MET_mask & Mee_cut_mask # SR,CR # Apply cut6 Triple_eee_sel = Triple_eee[Event_sel_mask] leading_pho_sel = leading_pho[Event_sel_mask] MET_sel = MET[Event_sel_mask] events = events[Event_sel_mask] # Photon EE and EB isEE_mask = leading_pho.isScEtaEE isEB_mask = leading_pho.isScEtaEB Pho_EE = leading_pho[isEE_mask & Event_sel_mask] Pho_EB = leading_pho[isEB_mask & Event_sel_mask] # Stop processing if there is no event remain if len(leading_pho_sel) == 0: return out cut6 = np.ones(ak.sum(ak.num(leading_pho_sel) > 0)) * 6 out["cutflow"].fill(dataset=dataset, cutflow=cut6) ## -------------------- Prepare making hist --------------# # Photon phoPT = ak.flatten(leading_pho_sel.pt) phoEta = ak.flatten(leading_pho_sel.eta) phoPhi = ak.flatten(leading_pho_sel.phi) # Photon EE if len(Pho_EE.pt) != 0: Pho_EE_PT = ak.flatten(Pho_EE.pt) Pho_EE_Eta = ak.flatten(Pho_EE.eta) Pho_EE_Phi = ak.flatten(Pho_EE.phi) Pho_EE_sieie = ak.flatten(Pho_EE.sieie) Pho_EE_hoe = ak.flatten(Pho_EE.hoe) Pho_EE_Iso_charge = ak.flatten(Pho_EE.pfRelIso03_chg) # Photon EB if len(Pho_EB.pt) != 0: Pho_EB_PT = ak.flatten(Pho_EB.pt) Pho_EB_Eta = ak.flatten(Pho_EB.eta) Pho_EB_Phi = ak.flatten(Pho_EB.phi) Pho_EB_sieie = ak.flatten(Pho_EB.sieie) Pho_EB_hoe = ak.flatten(Pho_EB.hoe) Pho_EB_Iso_charge = ak.flatten(Pho_EB.pfRelIso03_chg) # Electrons ele1PT = ak.flatten(Triple_eee_sel.lep1.pt) ele1Eta = ak.flatten(Triple_eee_sel.lep1.eta) ele1Phi = ak.flatten(Triple_eee_sel.lep1.phi) ele2PT = ak.flatten(Triple_eee_sel.lep2.pt) ele2Eta = ak.flatten(Triple_eee_sel.lep2.eta) ele2Phi = ak.flatten(Triple_eee_sel.lep2.phi) ele3PT = ak.flatten(Triple_eee_sel.lep3.pt) ele3Eta = ak.flatten(Triple_eee_sel.lep3.eta) ele3Phi = ak.flatten(Triple_eee_sel.lep3.phi) charge = ak.flatten(Triple_eee.lep1.charge + Triple_eee.lep2.charge) # MET met = ak.to_numpy(MET_sel) # M(eea) M(ee) diele = Triple_eee_sel.p4 eeg_vec = diele + leading_pho_sel Meea = ak.flatten(eeg_vec.mass) Mee = ak.flatten(Triple_eee_sel.p4.mass) # --- Apply weight and hist if isFake: weights = processor.Weights(len(cut6)) else: weights = processor.Weights(len(cut5)) # -------------------- Sieie bins---------------------------# def make_bins(pt, eta, bin_range_str): bin_dict = { "PT_1_eta_1": (pt > 20) & (pt < 30) & (eta < 1), "PT_1_eta_2": (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5), "PT_1_eta_3": (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2), "PT_1_eta_4": (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5), "PT_2_eta_1": (pt > 30) & (pt < 40) & (eta < 1), "PT_2_eta_2": (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5), "PT_2_eta_3": (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2), "PT_2_eta_4": (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5), "PT_3_eta_1": (pt > 40) & (pt < 50) & (eta < 1), "PT_3_eta_2": (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5), "PT_3_eta_3": (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2), "PT_3_eta_4": (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5), "PT_4_eta_1": (pt > 50) & (eta < 1), "PT_4_eta_2": (pt > 50) & (eta > 1) & (eta < 1.5), "PT_4_eta_3": (pt > 50) & (eta > 1.5) & (eta < 2), "PT_4_eta_4": (pt > 50) & (eta > 2) & (eta < 2.5), } binmask = bin_dict[bin_range_str] return binmask bin_name_list = [ "PT_1_eta_1", "PT_1_eta_2", "PT_1_eta_3", "PT_1_eta_4", "PT_2_eta_1", "PT_2_eta_2", "PT_2_eta_3", "PT_2_eta_4", "PT_3_eta_1", "PT_3_eta_2", "PT_3_eta_3", "PT_3_eta_4", "PT_4_eta_1", "PT_4_eta_2", "PT_4_eta_3", "PT_4_eta_4", ] ## -- Fake-fraction Lookup table --## if isFake: # Make Bin-range mask binned_pteta_mask = {} for name in bin_name_list: binned_pteta_mask[name] = make_bins( ak.flatten(leading_pho_sel.pt), ak.flatten(abs(leading_pho_sel.eta)), name, ) # Read Fake fraction --> Mapping bin name to int() in_dict = np.load('Fitting_v2/results_210517.npy',allow_pickle="True")[()] idx=0 fake_dict ={} for i,j in in_dict.items(): fake_dict[idx] = j idx+=1 # Reconstruct Fake_weight fw= 0 for i,j in binned_pteta_mask.items(): fw = fw + j*fake_dict[bin_name_list.index(i)] # Process 0 weight to 1 @numba.njit def zero_one(x): if x == 0: x = 1 return x vec_zero_one = np.vectorize(zero_one) fw = vec_zero_one(fw) # --- skim cut-weight if not isFake: def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr != 0 return ak.to_numpy(subarr[mask2]) else: def skim_weight(arr): return arr if not isFake: cuts = Event_sel_mask cuts_pho_EE = ak.flatten(isEE_mask) cuts_pho_EB = ak.flatten(isEB_mask) if isFake: cuts = np.ones(len(Event_sel_mask)) cuts_pho_EE = ak.flatten(isEE_mask & Event_sel_mask) cuts_pho_EB = ak.flatten(isEB_mask & Event_sel_mask) if isFake: weights.add("fake_fraction", fw) # Weight and SF here if not (isData | isFake): weights.add("pileup", pu) weights.add("ele_id", ele_medium_id_sf) weights.add("pho_id", get_pho_medium_id_sf) weights.add("ele_reco", ele_reco_sf) # 2016,2017 are not applied yet if self._year == "2018": weights.add("ele_trigger", ele_trig_weight) # ---------------------------- Fill hist --------------------------------------# # Initial events out["sumw"][dataset] += len(Initial_events) print("cut1: {0},cut2: {1},cut3: {2},cut4: {3},cut5: {4},cut6: {5},cut7: {6}".format(len(cut0), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5),len(cut6))) ## Cut flow loop #for cut in [cut0, cut1, cut2, cut3, cut4, cut5,cut6]: # out["cutflow"].fill(dataset=dataset, cutflow=cut) # Primary vertex out["nPV"].fill( dataset=dataset, nPV=nPV, ) out["nPV_nw"].fill(dataset=dataset, nPV_nw=nPV_nw) # Fill hist # -- met -- # out["met"].fill( dataset=dataset, met=met, weight=skim_weight(weights.weight() * cuts) ) # --mass -- # out["mass"].fill( dataset=dataset, mass=Mee, weight=skim_weight(weights.weight() * cuts) ) out["mass_eea"].fill( dataset=dataset, mass_eea=Meea, weight=skim_weight(weights.weight() * cuts) ) # -- Electron -- # out["ele1pt"].fill( dataset=dataset, ele1pt=ele1PT, weight=skim_weight(weights.weight() * cuts) ) out["ele1eta"].fill( dataset=dataset, ele1eta=ele1Eta, weight=skim_weight(weights.weight() * cuts), ) out["ele1phi"].fill( dataset=dataset, ele1phi=ele1Phi, weight=skim_weight(weights.weight() * cuts), ) out["ele2pt"].fill( dataset=dataset, ele2pt=ele2PT, weight=skim_weight(weights.weight() * cuts) ) out["ele2eta"].fill( dataset=dataset, ele2eta=ele2Eta, weight=skim_weight(weights.weight() * cuts), ) out["ele2phi"].fill( dataset=dataset, ele2phi=ele2Phi, weight=skim_weight(weights.weight() * cuts), ) out["ele3pt"].fill( dataset=dataset, ele3pt=ele3PT, weight=skim_weight(weights.weight() * cuts) ) # -- Photon -- # out["phopt"].fill( dataset=dataset, phopt=phoPT, weight=skim_weight(weights.weight() * cuts) ) out["phoeta"].fill( dataset=dataset, phoeta=phoEta, weight=skim_weight(weights.weight() * cuts) ) out["phophi"].fill( dataset=dataset, phophi=phoPhi, weight=skim_weight(weights.weight() * cuts) ) if len(Pho_EE.pt) != 0: out["pho_EE_pt"].fill( dataset=dataset, pho_EE_pt=Pho_EE_PT, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_eta"].fill( dataset=dataset, pho_EE_eta=Pho_EE_Eta, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_phi"].fill( dataset=dataset, pho_EE_phi=Pho_EE_Phi, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_hoe"].fill( dataset=dataset, pho_EE_hoe=Pho_EE_hoe, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_sieie"].fill( dataset=dataset, pho_EE_sieie=Pho_EE_sieie, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_Iso_chg"].fill( dataset=dataset, pho_EE_Iso_chg=Pho_EE_Iso_charge, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) if len(Pho_EB.pt) != 0: out["pho_EB_pt"].fill( dataset=dataset, pho_EB_pt=Pho_EB_PT, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_eta"].fill( dataset=dataset, pho_EB_eta=Pho_EB_Eta, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_phi"].fill( dataset=dataset, pho_EB_phi=Pho_EB_Phi, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_hoe"].fill( dataset=dataset, pho_EB_hoe=Pho_EB_hoe, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_sieie"].fill( dataset=dataset, pho_EB_sieie=Pho_EB_sieie, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_Iso_chg"].fill( dataset=dataset, pho_EB_Iso_chg=Pho_EB_Iso_charge, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) return out
def select_normal(genparts, w_decay_momid): columns = ["pt", "eta", "phi", "mass", "pdgId", "charge", "decaytype"] # get tops, defined as last copy gen_tops = genparts[(genparts.hasFlags(['isLastCopy'])) & (genparts.pdgId == 6)] gen_tops['charge'] = ak.ones_like(gen_tops.pt) * (2. / 3.) gen_tbars = genparts[(genparts.hasFlags(['isLastCopy'])) & (genparts.pdgId == -6)] gen_tbars['charge'] = ak.ones_like(gen_tbars.pt) * (-2. / 3.) # get direct top decay products (children are "isHardProcess" and "isFirstCopy") gen_bs = ak.flatten(gen_tops.children[(gen_tops.children.pdgId == 5)], axis=2) gen_bs['charge'] = ak.ones_like(gen_bs.pt) * (-1. / 3.) gen_bbars = ak.flatten( gen_tbars.children[(gen_tbars.children.pdgId == -5)], axis=2) gen_bbars['charge'] = ak.ones_like(gen_bbars.pt) * (1. / 3.) gen_wplus = ak.flatten( gen_tops.children[(gen_tops.children.pdgId == w_decay_momid)], axis=2) gen_wplus['charge'] = ak.ones_like(gen_wplus.pt) gen_wminus = ak.flatten( gen_tbars.children[(gen_tbars.children.pdgId == -1 * w_decay_momid)], axis=2) gen_wminus['charge'] = ak.ones_like(gen_wminus.pt) * (-1.) # get w decay products # get last copy of W bosons last_gen_wplus = ak.flatten(gen_tops.distinctChildren[ (gen_tops.distinctChildren.pdgId == w_decay_momid) & (gen_tops.distinctChildren.hasFlags(['isLastCopy']))], axis=2) last_gen_wminus = ak.flatten(gen_tbars.distinctChildren[ (gen_tbars.distinctChildren.pdgId == -1 * w_decay_momid) & (gen_tbars.distinctChildren.hasFlags(['isLastCopy']))], axis=2) gen_wplus_decaytype = np.zeros(len(gen_wplus)) gen_wminus_decaytype = np.zeros(len(gen_wminus)) # up/down partons from last W+ gen_wpartons_up_fromWplus = ak.flatten( last_gen_wplus.children[(np.mod(last_gen_wplus.children.pdgId, 2) == 0) & (np.abs(last_gen_wplus.children.pdgId) < 6)], axis=2) gen_wpartons_up_fromWplus['charge'] = np.sign( gen_wpartons_up_fromWplus.pdgId) * (2. / 3.) gen_wplus_decaytype[ak.num(gen_wpartons_up_fromWplus) > 0] = np.ones( ak.sum(ak.num(gen_wpartons_up_fromWplus) > 0)) * 2 gen_wpartons_dw_fromWplus = ak.flatten( last_gen_wplus.children[(np.mod(last_gen_wplus.children.pdgId, 2) == 1) & (np.abs(last_gen_wplus.children.pdgId) < 6)], axis=2) gen_wpartons_dw_fromWplus['charge'] = np.sign( gen_wpartons_up_fromWplus.pdgId) * (-1. / 3.) # up/down partons from last W- gen_wpartons_up_fromWminus = ak.flatten(last_gen_wminus.children[ (np.mod(last_gen_wminus.children.pdgId, 2) == 0) & (np.abs(last_gen_wminus.children.pdgId) < 6)], axis=2) gen_wpartons_up_fromWminus['charge'] = np.sign( gen_wpartons_up_fromWminus.pdgId) * (2. / 3.) gen_wminus_decaytype[ak.num(gen_wpartons_up_fromWminus) > 0] = np.ones( ak.sum(ak.num(gen_wpartons_up_fromWminus) > 0)) * 2 gen_wpartons_dw_fromWminus = ak.flatten(last_gen_wminus.children[ (np.mod(last_gen_wminus.children.pdgId, 2) == 1) & (np.abs(last_gen_wminus.children.pdgId) < 6)], axis=2) gen_wpartons_dw_fromWminus['charge'] = np.sign( gen_wpartons_up_fromWminus.pdgId) * (-1. / 3.) # charged leps from last W+ gen_charged_leps_fromWplus = ak.flatten( last_gen_wplus.children[(np.abs(last_gen_wplus.children.pdgId) == 11) | (np.abs(last_gen_wplus.children.pdgId) == 13) | (np.abs(last_gen_wplus.children.pdgId) == 15)], axis=2) gen_charged_leps_fromWplus['charge'] = ak.ones_like( gen_charged_leps_fromWplus.pdgId) gen_wplus_decaytype[ak.num(gen_charged_leps_fromWplus) > 0] = np.ones( ak.sum(ak.num(gen_charged_leps_fromWplus) > 0)) # add decaytype (0 is INVALID, 1 is LEPTONIC, 2 is HADRONIC) gen_wplus['decaytype'] = ak.unflatten(gen_wplus_decaytype, ak.num(gen_wplus)) gen_tops['decaytype'] = gen_wplus['decaytype'] # set decaytype for tops gen_bs['decaytype'] = gen_wplus['decaytype'] # set decaytype for bs # neutral leps from last W+ gen_neutral_leps_fromWplus = ak.flatten( last_gen_wplus.children[(np.abs(last_gen_wplus.children.pdgId) == 12) | (np.abs(last_gen_wplus.children.pdgId) == 14) | (np.abs(last_gen_wplus.children.pdgId) == 16)], axis=2) gen_neutral_leps_fromWplus['charge'] = ak.zeros_like( gen_neutral_leps_fromWplus.pt) # charged leps from last W- gen_charged_leps_fromWminus = ak.flatten(last_gen_wminus.children[ (np.abs(last_gen_wminus.children.pdgId) == 11) | (np.abs(last_gen_wminus.children.pdgId) == 13) | (np.abs(last_gen_wminus.children.pdgId) == 15)], axis=2) gen_charged_leps_fromWminus['charge'] = ak.ones_like( gen_charged_leps_fromWminus.pdgId) * (-1.) gen_wminus_decaytype[ak.num(gen_charged_leps_fromWminus) > 0] = np.ones( ak.sum(ak.num(gen_charged_leps_fromWminus) > 0)) # add decaytype (0 is INVALID, 1 is LEPTONIC, 2 is HADRONIC) gen_wminus['decaytype'] = ak.unflatten(gen_wminus_decaytype, ak.num(gen_wminus)) gen_tbars['decaytype'] = gen_wminus['decaytype'] # set decaytype for tbars gen_bbars['decaytype'] = gen_wminus['decaytype'] # set decaytype for bbars # neutral leps from last W- gen_neutral_leps_fromWminus = ak.flatten(last_gen_wminus.children[ (np.abs(last_gen_wminus.children.pdgId) == 12) | (np.abs(last_gen_wminus.children.pdgId) == 14) | (np.abs(last_gen_wminus.children.pdgId) == 16)], axis=2) gen_neutral_leps_fromWminus['charge'] = ak.zeros_like( gen_neutral_leps_fromWminus.pt) gen_wpartons_up = ak.Array({}, with_name="PtEtaPhiMLorentzVector") gen_wpartons_dw = ak.Array({}, with_name="PtEtaPhiMLorentzVector") gen_charged_leps = ak.Array({}, with_name="PtEtaPhiMLorentzVector") gen_neutral_leps = ak.Array({}, with_name="PtEtaPhiMLorentzVector") for column in columns: if column == 'decaytype': continue gen_wpartons_up[column] = ak.flatten( ak.concatenate([ gen_wpartons_up_fromWplus[column], gen_wpartons_up_fromWminus[column] ], axis=1)) # (up-type partons from W+, W-) gen_wpartons_dw[column] = ak.flatten( ak.concatenate([ gen_wpartons_dw_fromWplus[column], gen_wpartons_dw_fromWminus[column] ], axis=1)) # (dw-type partons from W+, W-) gen_charged_leps[column] = ak.flatten( ak.concatenate([ gen_charged_leps_fromWplus[column], gen_charged_leps_fromWminus[column] ], axis=1)) # (charged leps from W+, W-) gen_neutral_leps[column] = ak.flatten( ak.concatenate([ gen_neutral_leps_fromWplus[column], gen_neutral_leps_fromWminus[column] ], axis=1)) # (neutral leps from W+, W-) gen_wpartons_up = ak.unflatten( gen_wpartons_up, ak.num(gen_wpartons_up_fromWplus) + ak.num(gen_wpartons_up_fromWminus)) gen_wpartons_dw = ak.unflatten( gen_wpartons_dw, ak.num(gen_wpartons_dw_fromWplus) + ak.num(gen_wpartons_dw_fromWminus)) gen_charged_leps = ak.unflatten( gen_charged_leps, ak.num(gen_charged_leps_fromWplus) + ak.num(gen_charged_leps_fromWminus)) gen_neutral_leps = ak.unflatten( gen_neutral_leps, ak.num(gen_neutral_leps_fromWplus) + ak.num(gen_neutral_leps_fromWminus)) gen_taus = gen_charged_leps[np.abs(gen_charged_leps.pdgId) == 15] gen_nu_taus = gen_neutral_leps[np.abs(gen_neutral_leps.pdgId) == 16] # fully hadronic evts had_evts = (ak.num(gen_charged_leps) == 0) & ( ak.num(gen_neutral_leps) == 0) & (ak.num(gen_wpartons_up) == 2) & (ak.num(gen_wpartons_dw) == 2) #set_trace() # get direct tau decay products from hard processes (subset of gen_taus events above) tau_decay_prods = genparts[genparts.hasFlags( ['isDirectHardProcessTauDecayProduct'])] # only need decays to leptons (e/mu, tau nu) for event classification tau_TO_tau_nu = tau_decay_prods[np.abs(tau_decay_prods.pdgId) == 16] tau_TO_charged_lep = tau_decay_prods[(np.abs(tau_decay_prods.pdgId) == 11) | (np.abs(tau_decay_prods.pdgId) == 13)] tau_TO_neutral_lep = tau_decay_prods[(np.abs(tau_decay_prods.pdgId) == 12) | (np.abs(tau_decay_prods.pdgId) == 14)] # set decaytype for gen taus charged_lep_decaytype_array = ak.to_numpy( ak.flatten(ak.zeros_like(gen_charged_leps['pt']))) # semilep evts semilep_evts = (ak.num(gen_charged_leps) == 1) & ( ak.num(gen_neutral_leps) == 1) & (ak.num(gen_wpartons_up) == 1) & (ak.num(gen_wpartons_dw) == 1) tau_jets_evts = semilep_evts & (ak.num(gen_taus) == 1) sl_evts_mask = np.repeat(ak.to_numpy(semilep_evts), ak.to_numpy(ak.num(gen_charged_leps))) semilep_decaytype_array = np.zeros(ak.to_numpy(semilep_evts).sum(), dtype=int) # tau -> l semilep_tau_leptonic_decay = (tau_jets_evts) & ( ak.num(tau_TO_charged_lep) == 1) & ( ak.num(tau_TO_neutral_lep) == 1) & (ak.num(tau_TO_tau_nu) == 1) semilep_tau_hadronic_decay = (tau_jets_evts) & ( ~semilep_tau_leptonic_decay) semilep_decaytype_array[ semilep_tau_leptonic_decay[semilep_evts]] = np.ones( ak.to_numpy(semilep_tau_leptonic_decay).sum(), dtype=int) semilep_decaytype_array[ semilep_tau_hadronic_decay[semilep_evts]] = np.ones( ak.to_numpy(semilep_tau_hadronic_decay).sum(), dtype=int) * 2 # set charged_lep_decatype_array for semileptonic events charged_lep_decaytype_array[sl_evts_mask] = semilep_decaytype_array # dilep evts dilep_evts = (ak.num(gen_charged_leps) == 2) & ( ak.num(gen_neutral_leps) == 2) & (ak.num(gen_wpartons_up) == 0) & (ak.num(gen_wpartons_dw) == 0) lep_lep_evts = dilep_evts & (ak.num(gen_taus) == 0) lep_tau_evts = dilep_evts & (ak.num(gen_taus) == 1) tau_tau_evts = dilep_evts & (ak.num(gen_taus) == 2) dl_evts_mask = np.repeat(ak.to_numpy(dilep_evts), ak.to_numpy(ak.num(gen_charged_leps))) dilep_decaytype_array = np.zeros((ak.to_numpy(dilep_evts).sum(), 2), dtype=int) # tau + tau # tau + tau -> ll dilep_TauTau_ll_decay = (tau_tau_evts) & ( ak.num(tau_TO_charged_lep) == 2) & ( ak.num(tau_TO_neutral_lep) == 2) & (ak.num(tau_TO_tau_nu) == 2) dilep_decaytype_array[dilep_TauTau_ll_decay[dilep_evts]] = np.ones( (ak.to_numpy(dilep_TauTau_ll_decay).sum(), 2), dtype=int) # tau + tau -> hh dilep_TauTau_hh_decay = (tau_tau_evts) & ( (ak.num(tau_TO_charged_lep) + ak.num(tau_TO_neutral_lep)) == 0) & (ak.num(tau_TO_tau_nu) == 2) dilep_decaytype_array[dilep_TauTau_hh_decay[dilep_evts]] = np.ones( (ak.to_numpy(dilep_TauTau_hh_decay).sum(), 2), dtype=int) * 2 # tau + tau -> lh dilep_TauTau_lh_decay = ( tau_tau_evts) & ~(dilep_TauTau_ll_decay | dilep_TauTau_hh_decay) # set index corresponding to leptonically decaying tau to 1, default array is set to 2 dl_TauTau_to_lh_decaytype_array = np.ones( ak.to_numpy(dilep_TauTau_lh_decay).sum() * 2, dtype=int) * 2 lep_tau_mask = (np.repeat( ak.to_numpy( ak.flatten( tau_TO_charged_lep[dilep_TauTau_lh_decay].parent.pdgId)), 2) == ak.flatten(gen_charged_leps[dilep_TauTau_lh_decay].pdgId)) dl_TauTau_to_lh_decaytype_array[lep_tau_mask] = np.ones(lep_tau_mask.sum(), dtype=int) dilep_decaytype_array[dilep_TauTau_lh_decay[ dilep_evts]] = dl_TauTau_to_lh_decaytype_array.reshape( ak.to_numpy(dilep_TauTau_lh_decay).sum(), 2) # lep + tau # tau -> l dilep_LepTau_l_decay = (lep_tau_evts) & ( ak.num(tau_TO_charged_lep) == 1) & ( ak.num(tau_TO_neutral_lep) == 1) & (ak.num(tau_TO_tau_nu) == 1) # set index corresponding to tau to 1 dl_LepTau_to_Lep_decaytype_array = np.zeros( ak.to_numpy(dilep_LepTau_l_decay).sum() * 2, dtype=int) dl_LepTau_to_Lep_decaytype_array[ak.flatten( np.abs(gen_charged_leps[dilep_LepTau_l_decay].pdgId) == 15)] = np.ones( ak.sum(dilep_LepTau_l_decay), dtype=int) dilep_decaytype_array[dilep_LepTau_l_decay[ dilep_evts]] = dl_LepTau_to_Lep_decaytype_array.reshape( ak.sum(dilep_LepTau_l_decay), 2) # tau -> h dilep_LepTau_h_decay = (lep_tau_evts) & ~(dilep_LepTau_l_decay) # set index corresponding to tau to 2 dl_LepTau_to_Had_decaytype_array = np.zeros(ak.sum(dilep_LepTau_h_decay) * 2, dtype=int) dl_LepTau_to_Had_decaytype_array[ak.flatten( np.abs(gen_charged_leps[dilep_LepTau_h_decay].pdgId) == 15)] = np.ones( ak.sum(dilep_LepTau_h_decay), dtype=int) * 2 dilep_decaytype_array[dilep_LepTau_h_decay[ dilep_evts]] = dl_LepTau_to_Had_decaytype_array.reshape( ak.sum(dilep_LepTau_h_decay), 2) # set charged_lep_decatype_array for dileptonic events charged_lep_decaytype_array[dl_evts_mask] = dilep_decaytype_array.flatten() # set charged lepton decaytype (defined for taus only, e/mu are 0) (1 is LEPTONIC, 2 is HADRONIC) gen_charged_leps['decaytype'] = ak.unflatten(charged_lep_decaytype_array, ak.num(gen_charged_leps)) # make awkward arrays of (top decay prods, tbar decay prods) Gen_Top_Pairs = ak.Array({}, with_name="PtEtaPhiMLorentzVector") Gen_B_Pairs = ak.Array({}, with_name="PtEtaPhiMLorentzVector") Gen_W_Pairs = ak.Array({}, with_name="PtEtaPhiMLorentzVector") Gen_Wparton_Pairs = ak.Array({}, with_name="PtEtaPhiMLorentzVector") for column in columns: Gen_Top_Pairs[column] = ak.flatten( ak.concatenate([gen_tops[column], gen_tbars[column]], axis=1)) # (top, tbar) Gen_B_Pairs[column] = ak.flatten( ak.concatenate([gen_bs[column], gen_bbars[column]], axis=1)) # (b, bbar) Gen_W_Pairs[column] = ak.flatten( ak.concatenate([gen_wplus[column], gen_wminus[column]], axis=1)) # (W+, W-) if column is not "decaytype": Gen_Wparton_Pairs[column] = ak.flatten( ak.concatenate( [ ak.pad_none(gen_wpartons_up[column], 1, axis=1), ak.pad_none(gen_wpartons_dw[column], 1, axis=1) ], axis=1)) # (up-type wpartons, down-type wpartons) Gen_Top_Pairs = ak.unflatten(Gen_Top_Pairs, ak.num(gen_tops) + ak.num(gen_tbars)) Gen_B_Pairs = ak.unflatten(Gen_B_Pairs, ak.num(gen_bs) + ak.num(gen_bbars)) Gen_W_Pairs = ak.unflatten(Gen_W_Pairs, ak.num(gen_wplus) + ak.num(gen_wminus)) Gen_Wparton_Pairs = ak.unflatten( Gen_Wparton_Pairs, ak.num(ak.pad_none(gen_wpartons_up, 1, axis=1)) + ak.num(ak.pad_none(gen_wpartons_dw, 1, axis=1))) Gen_Wparton_Pairs = Gen_Wparton_Pairs[ak.argsort( Gen_Wparton_Pairs["pt"], ascending=False)] # sort by pt Gen_TTbar = ak.Array( { "pt": (gen_tops + gen_tbars).pt, "eta": (gen_tops + gen_tbars).eta, "phi": (gen_tops + gen_tbars).phi, "mass": (gen_tops + gen_tbars).mass, "decaytype": gen_tops["decaytype"] + gen_tbars[ "decaytype"], # 0 is for INVALID, 2 for DILEP, 3 for SEMILEP, 4 for HADRONIC }, with_name="PtEtaPhiMLorentzVector") ## make "table" of gen objects for certain decays ## DILEP DILEP_evts = ak.zip({ "TTbar": Gen_TTbar[dilep_evts] if ak.any(dilep_evts) else ak.unflatten( Gen_TTbar[dilep_evts], ak.values_astype(dilep_evts, int)), "Top": Gen_Top_Pairs[np.sign(Gen_Top_Pairs.charge) == 1][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( Gen_Top_Pairs[np.sign(Gen_Top_Pairs.charge) == 1][dilep_evts], ak.values_astype(dilep_evts, int)), "Tbar": Gen_Top_Pairs[np.sign(Gen_Top_Pairs.charge) == -1][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( Gen_Top_Pairs[np.sign(Gen_Top_Pairs.charge) == -1][dilep_evts], ak.values_astype(dilep_evts, int)), "B": Gen_B_Pairs[np.sign(Gen_B_Pairs.charge) == -1][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( Gen_B_Pairs[np.sign(Gen_B_Pairs.charge) == -1][dilep_evts], ak.values_astype(dilep_evts, int)), "Bbar": Gen_B_Pairs[np.sign(Gen_B_Pairs.charge) == 1][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( Gen_B_Pairs[np.sign(Gen_B_Pairs.charge) == 1][dilep_evts], ak.values_astype(dilep_evts, int)), "Wplus": Gen_W_Pairs[Gen_W_Pairs.charge == 1][dilep_evts] if ak.any(dilep_evts) else ak.unflatten(Gen_W_Pairs[Gen_W_Pairs.charge == 1][dilep_evts], ak.values_astype(dilep_evts, int)), "Wminus": Gen_W_Pairs[Gen_W_Pairs.charge == -1][dilep_evts] if ak.any(dilep_evts) else ak.unflatten(Gen_W_Pairs[Gen_W_Pairs.charge == -1][dilep_evts], ak.values_astype(dilep_evts, int)), "First_plus": gen_charged_leps[gen_charged_leps.charge > 0][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( gen_charged_leps[gen_charged_leps.charge > 0][dilep_evts], ak.values_astype(dilep_evts, int)), # charged lepton always made leading "Second_plus": gen_neutral_leps[gen_charged_leps.charge > 0][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( gen_neutral_leps[gen_charged_leps.charge > 0][dilep_evts], ak.values_astype(dilep_evts, int)), # neutral lepton always made subleading "First_minus": gen_charged_leps[gen_charged_leps.charge < 0][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( gen_charged_leps[gen_charged_leps.charge < 0][dilep_evts], ak.values_astype(dilep_evts, int)), # charged lepton always made leading "Second_minus": gen_neutral_leps[gen_charged_leps.charge < 0][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( gen_neutral_leps[gen_charged_leps.charge < 0][dilep_evts], ak.values_astype(dilep_evts, int)), # neutral lepton always made subleading "Up_plus": gen_neutral_leps[gen_charged_leps.charge > 0][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( gen_neutral_leps[gen_charged_leps.charge > 0][dilep_evts], ak.values_astype(dilep_evts, int)), # same as second plus "Down_plus": gen_charged_leps[gen_charged_leps.charge > 0][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( gen_charged_leps[gen_charged_leps.charge > 0][dilep_evts], ak.values_astype(dilep_evts, int)), # same as first plus "Up_minus": gen_neutral_leps[gen_charged_leps.charge < 0][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( gen_neutral_leps[gen_charged_leps.charge < 0][dilep_evts], ak.values_astype(dilep_evts, int)), # same as second minus "Down_minus": gen_charged_leps[gen_charged_leps.charge < 0][dilep_evts] if ak.any(dilep_evts) else ak.unflatten( gen_charged_leps[gen_charged_leps.charge < 0][dilep_evts], ak.values_astype(dilep_evts, int)), # same as first minus }) ## HAD HAD_evts = ak.zip({ "TTbar": Gen_TTbar[had_evts] if ak.any(had_evts) else ak.unflatten( Gen_TTbar[had_evts], ak.values_astype(had_evts, int)), "Top": Gen_Top_Pairs[np.sign(Gen_Top_Pairs.charge) == 1][had_evts] if ak.any(had_evts) else ak.unflatten( Gen_Top_Pairs[np.sign(Gen_Top_Pairs.charge) == 1][had_evts], ak.values_astype(had_evts, int)), "Tbar": Gen_Top_Pairs[np.sign(Gen_Top_Pairs.charge) == -1][had_evts] if ak.any(had_evts) else ak.unflatten( Gen_Top_Pairs[np.sign(Gen_Top_Pairs.charge) == -1][had_evts], ak.values_astype(had_evts, int)), "B": Gen_B_Pairs[np.sign(Gen_B_Pairs.charge) == -1][had_evts] if ak.any(had_evts) else ak.unflatten( Gen_B_Pairs[np.sign(Gen_B_Pairs.charge) == -1][had_evts], ak.values_astype(had_evts, int)), "Bbar": Gen_B_Pairs[np.sign(Gen_B_Pairs.charge) == 1][had_evts] if ak.any(had_evts) else ak.unflatten( Gen_B_Pairs[np.sign(Gen_B_Pairs.charge) == 1][had_evts], ak.values_astype(had_evts, int)), "Wplus": Gen_W_Pairs[Gen_W_Pairs.charge == 1][had_evts] if ak.any(had_evts) else ak.unflatten(Gen_W_Pairs[Gen_W_Pairs.charge == 1][had_evts], ak.values_astype(had_evts, int)), "Wminus": Gen_W_Pairs[Gen_W_Pairs.charge == -1][had_evts] if ak.any(had_evts) else ak.unflatten(Gen_W_Pairs[Gen_W_Pairs.charge == -1][had_evts], ak.values_astype(had_evts, int)), "First_plus": Gen_Wparton_Pairs[had_evts][Gen_Wparton_Pairs[had_evts].charge > 0][:, 0] if ak.any(had_evts) else ak.unflatten( Gen_Wparton_Pairs[had_evts][Gen_Wparton_Pairs[had_evts].charge > 0] [:, 0], ak.values_astype( had_evts, int)), # leading positively-charged parton "Second_plus": Gen_Wparton_Pairs[had_evts][Gen_Wparton_Pairs[had_evts].charge > 0][:, 1] if ak.any(had_evts) else ak.unflatten( Gen_Wparton_Pairs[had_evts][Gen_Wparton_Pairs[had_evts].charge > 0] [:, 1], ak.values_astype( had_evts, int)), # subleading positively-charged parton "First_minus": Gen_Wparton_Pairs[had_evts][Gen_Wparton_Pairs[had_evts].charge < 0][:, 0] if ak.any(had_evts) else ak.unflatten( Gen_Wparton_Pairs[had_evts][Gen_Wparton_Pairs[had_evts].charge < 0] [:, 0], ak.values_astype( had_evts, int)), # leading negatively-charged parton "Second_minus": Gen_Wparton_Pairs[had_evts][Gen_Wparton_Pairs[had_evts].charge < 0][:, 1] if ak.any(had_evts) else ak.unflatten( Gen_Wparton_Pairs[had_evts][Gen_Wparton_Pairs[had_evts].charge < 0] [:, 1], ak.values_astype( had_evts, int)), # subleading negatively-charged parton "Up_plus": gen_wpartons_up[gen_wpartons_up.charge > 0][had_evts] if ak.any(had_evts) else ak.unflatten( gen_wpartons_up[gen_wpartons_up.charge > 0][had_evts], ak.values_astype(had_evts, int)), # positively-charged up-type parton "Down_plus": gen_wpartons_dw[gen_wpartons_dw.charge > 0][had_evts] if ak.any(had_evts) else ak.unflatten( gen_wpartons_dw[gen_wpartons_dw.charge > 0][had_evts], ak.values_astype(had_evts, int)), # positively-charged down-type parton "Up_minus": gen_wpartons_up[gen_wpartons_up.charge < 0][had_evts] if ak.any(had_evts) else ak.unflatten( gen_wpartons_up[gen_wpartons_up.charge < 0][had_evts], ak.values_astype(had_evts, int)), # negatively-charged up-type parton "Down_minus": gen_wpartons_dw[gen_wpartons_dw.charge < 0][had_evts] if ak.any(had_evts) else ak.unflatten( gen_wpartons_dw[gen_wpartons_dw.charge < 0][had_evts], ak.values_astype(had_evts, int)), # negatively-charged down-type parton }) ## SEMILEP SEMILEP_evts = ak.zip({ "TTbar": Gen_TTbar[semilep_evts] if ak.any(semilep_evts) else ak.unflatten( Gen_TTbar[semilep_evts], ak.values_astype(semilep_evts, int)), "THad": Gen_Top_Pairs[Gen_Top_Pairs.decaytype == 2][semilep_evts] if ak.any(semilep_evts) else ak.unflatten( Gen_Top_Pairs[Gen_Top_Pairs.decaytype == 2][semilep_evts], ak.values_astype(semilep_evts, int)), "TLep": Gen_Top_Pairs[Gen_Top_Pairs.decaytype == 1][semilep_evts] if ak.any(semilep_evts) else ak.unflatten( Gen_Top_Pairs[Gen_Top_Pairs.decaytype == 1][semilep_evts], ak.values_astype(semilep_evts, int)), "BHad": Gen_B_Pairs[Gen_B_Pairs.decaytype == 2][semilep_evts] if ak.any(semilep_evts) else ak.unflatten( Gen_B_Pairs[Gen_B_Pairs.decaytype == 2][semilep_evts], ak.values_astype(semilep_evts, int)), "BLep": Gen_B_Pairs[Gen_B_Pairs.decaytype == 1][semilep_evts] if ak.any(semilep_evts) else ak.unflatten( Gen_B_Pairs[Gen_B_Pairs.decaytype == 1][semilep_evts], ak.values_astype(semilep_evts, int)), "WHad": Gen_W_Pairs[Gen_W_Pairs.decaytype == 2][semilep_evts] if ak.any(semilep_evts) else ak.unflatten( Gen_W_Pairs[Gen_W_Pairs.decaytype == 2][semilep_evts], ak.values_astype(semilep_evts, int)), "WLep": Gen_W_Pairs[Gen_W_Pairs.decaytype == 1][semilep_evts] if ak.any(semilep_evts) else ak.unflatten( Gen_W_Pairs[Gen_W_Pairs.decaytype == 1][semilep_evts], ak.values_astype(semilep_evts, int)), "Lepton": gen_charged_leps[semilep_evts] if ak.any(semilep_evts) else ak.unflatten(gen_charged_leps[semilep_evts], ak.values_astype(semilep_evts, int)), "Nu": gen_neutral_leps[semilep_evts] if ak.any(semilep_evts) else ak.unflatten(gen_neutral_leps[semilep_evts], ak.values_astype(semilep_evts, int)), "WJa": Gen_Wparton_Pairs[:, 0][semilep_evts] if ak.any(semilep_evts) else ak.unflatten(Gen_Wparton_Pairs[:, 0][semilep_evts], ak.values_astype(semilep_evts, int)), "WJb": Gen_Wparton_Pairs[:, 1][semilep_evts] if ak.any(semilep_evts) else ak.unflatten(Gen_Wparton_Pairs[:, 1][semilep_evts], ak.values_astype(semilep_evts, int)), "Up_Had": gen_wpartons_up[semilep_evts] if ak.any(semilep_evts) else ak.unflatten(gen_wpartons_up[semilep_evts], ak.values_astype(semilep_evts, int)), "Down_Had": gen_wpartons_dw[semilep_evts] if ak.any(semilep_evts) else ak.unflatten(gen_wpartons_dw[semilep_evts], ak.values_astype(semilep_evts, int)), }) # make dictionary to return GenObjects = dict({ "SL": SEMILEP_evts, "DL": DILEP_evts, "Had": HAD_evts, }) return GenObjects
def read(filename: Union[Path, str], events_per_chunk: int, parser: str = "pandas") -> Iterator[ak.Array]: """ Read a JETSCAPE FinalState{Hadrons,Partons} ASCII output file in chunks. This is the primary user function. We read in chunks to keep the memory usage manageable. Note: We store the data in the smallest possible types that can still encompass their range. Args: filename: Filename of the ASCII file. events_per_chunk: Number of events to provide in each chunk. parser: Name of the parser to use. Default: `pandas`, which uses `pandas.read_csv`. It uses compiled c, and seems to be the fastest available option. Other options: ["python", "numpy"]. Returns: Generator of an array of events_per_chunk events. """ # Validation filename = Path(filename) # Setup parsing_function_map = { "pandas": _parse_with_pandas, "python": _parse_with_python, "numpy": _parse_with_numpy, } parsing_function = parsing_function_map[parser] # Read the file, creating chunks of events. for i, chunk_generator in enumerate(read_events_in_chunks(filename=filename, events_per_chunk=events_per_chunk)): # Give a notification just in case the parsing is slow... logger.debug(f"New chunk {i}") # First, parse the lines. We need to make this call first because the event_split_index is only valid # after we've parse the lines. res = parsing_function(iter(chunk_generator)) # Before we do anything else, if our events_per_chunk is a even divisor of the total number of events # and we've hit the end of the file, we can return an empty generator after trying to parse the chunk. # In that case, we're done - just break. # NOTE: In the case where we've reached the end of file, but we've parsed some events, we want to continue # on so we don't lose any events. if chunk_generator.reached_end_of_file and len(chunk_generator.headers) == 0: break # Now, convert into the awkward array structure. array_with_events = ak.Array( np.split( res, chunk_generator.event_split_index() ) ) # Cross checks. # Length check that we have as many events as expected based on the number of headers. # logger.debug(f"ak.num: {ak.num(array_with_events, axis = 0)}, len headers: {len(chunk_generator.headers)}") assert (ak.num(array_with_events, axis = 0) == len(chunk_generator.headers)) # Check that n particles agree n_particles_from_header = np.array([header.n_particles for header in chunk_generator.headers]) # logger.info(f"n_particles from headers: {n_particles_from_header}") # logger.info(f"n_particles from array: {ak.num(array_with_events, axis = 1)}") assert (np.asarray(ak.num(array_with_events, axis = 1)) == n_particles_from_header).all() # State of the chunk # logger.debug(f"Reached end of file: {chunk_generator.reached_end_of_file}") # logger.debug(f"Incomplete chunk: {chunk_generator.incomplete_chunk}") # Let the use know so they're not surprised. if chunk_generator.incomplete_chunk: logger.warning(f"Requested {chunk_generator.events_per_chunk} events, but only {chunk_generator.events_contained_in_chunk} are available because we hit the end of the file.") # Header info header_level_info = { "event_plane_angle": np.array([header.event_plane_angle for header in chunk_generator.headers], np.float32), "event_ID": np.array([header.event_number for header in chunk_generator.headers], np.uint16), } if chunk_generator.headers[0].event_weight > -1: header_level_info["event_weight"] = np.array([header.event_weight for header in chunk_generator.headers], np.float32) # Cross section info if chunk_generator.cross_section: # Even though this is a dataset level quantity, we need to match the structure in order to zip them together for storage. # Since we're repeating the value, hopefully this will be compressed effectively. header_level_info["cross_section"] = np.full_like(header_level_info["event_plane_angle"], chunk_generator.cross_section.value) header_level_info["cross_section_error"] = np.full_like(header_level_info["event_plane_angle"], chunk_generator.cross_section.error) # Assemble all of the information in a single awkward array and pass it on. yield ak.zip( { # Header level info **header_level_info, # Particle level info "particle_ID": ak.values_astype(array_with_events[:, :, 1], np.int32), # We're only considering final state hadrons or partons, so status codes are limited to a few values. # -1 are holes, while >= 0 are signal particles (includes both the jet signal and the recoils). # So we can't differentiate the recoil from the signal. "status": ak.values_astype(array_with_events[:, :, 2], np.int8), "E": ak.values_astype(array_with_events[:, :, 3], np.float32), "px": ak.values_astype(array_with_events[:, :, 4], np.float32), "py": ak.values_astype(array_with_events[:, :, 5], np.float32), "pz": ak.values_astype(array_with_events[:, :, 6], np.float32), # We could skip eta and phi since we can always recalculate them. However, since we've already parsed # them, we may as well pass them along.:w "eta": ak.values_astype(array_with_events[:, :, 7], np.float32), "phi": ak.values_astype(array_with_events[:, :, 8], np.float32), }, depth_limit=1, )
def test(): array = ak.values_astype(ak.Array([1.1, 2.2, None, 3.3]), np.float32) assert str(ak.fill_none(array, np.float32(0)).type) == "4 * float32"
def applyJEC(self, jets, fixedGridRhoFastjetAll, events_cache, typeJet, isData, JECversion): '''Based on https://coffeateam.github.io/coffea/notebooks/applying_corrections.html#Applying-energy-scale-transformations-to-Jets''' ext = lookup_tools.extractor() JECtypes = [ 'L1FastJet', 'L2Relative', 'L2Residual', 'L3Absolute', 'L2L3Residual' ] jec_stack_names = [ JECversion + '_' + k + '_' + typeJet for k in JECtypes ] JECtypesfiles = [ '* * ' + self.corrJECfolder + '/' + k + '.txt' for k in jec_stack_names ] ext.add_weight_sets(JECtypesfiles) ext.finalize() evaluator = ext.make_evaluator() print("available evaluator keys:") for key in evaluator.keys(): print("\t", key) jec_inputs = {name: evaluator[name] for name in jec_stack_names} corrector = FactorizedJetCorrector(**jec_inputs) for i in jec_inputs: print(i, '\n', evaluator[i]) print(dir(evaluator)) print() jec_stack = JECStack(jec_inputs) name_map = jec_stack.blank_name_map name_map['JetPt'] = 'pt' name_map['JetMass'] = 'mass' name_map['JetEta'] = 'eta' name_map['JetA'] = 'area' jets['pt_raw'] = (1 - jets['rawFactor']) * jets['pt'] jets['mass_raw'] = (1 - jets['rawFactor']) * jets['mass'] jets['rho'] = ak.broadcast_arrays(fixedGridRhoFastjetAll, jets.pt)[0] name_map['ptRaw'] = 'pt_raw' name_map['massRaw'] = 'mass_raw' name_map['Rho'] = 'rho' if not isData: jets['pt_gen'] = ak.values_astype( ak.fill_none(jets.matched_gen.pt, 0), np.float32) name_map['ptGenJet'] = 'pt_gen' jet_factory = CorrectedJetsFactory(name_map, jec_stack) corrected_jets = jet_factory.build(jets, lazy_cache=events_cache) print() print('starting columns:', ak.fields(jets)) print() print('untransformed pt ratios', jets.pt / jets.pt_raw) print('untransformed mass ratios', jets.mass / jets.mass_raw) print('transformed pt ratios', corrected_jets.pt / corrected_jets.pt_raw) print('transformed mass ratios', corrected_jets.mass / corrected_jets.mass_raw) print() print('transformed columns:', ak.fields(corrected_jets)) return corrected_jets
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = sample_name # events.metadata['dataset'] # Data or MC isData = "genWeight" not in events.fields isFake = self._isFake # Stop processing if there is no event remain if len(events) == 0: return out # Golden Json file if (self._year == "2018") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABD" if (self._year == "2017") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" # <----- Get Scale factors ------># if not isData: # Egamma reco ID get_ele_reco_above20_sf = self._corrections[ "get_ele_reco_above20_sf"][self._year] get_ele_medium_id_sf = self._corrections["get_ele_medium_id_sf"][ self._year] get_pho_medium_id_sf = self._corrections["get_pho_medium_id_sf"][ self._year] # DoubleEG trigger # 2016, 2017 are not applied yet if self._year == "2018": get_ele_trig_leg1_SF = self._corrections[ "get_ele_trig_leg1_SF"][self._year] get_ele_trig_leg1_data_Eff = self._corrections[ "get_ele_trig_leg1_data_Eff"][self._year] get_ele_trig_leg1_mc_Eff = self._corrections[ "get_ele_trig_leg1_mc_Eff"][self._year] get_ele_trig_leg2_SF = self._corrections[ "get_ele_trig_leg2_SF"][self._year] get_ele_trig_leg2_data_Eff = self._corrections[ "get_ele_trig_leg2_data_Eff"][self._year] get_ele_trig_leg2_mc_Eff = self._corrections[ "get_ele_trig_leg2_mc_Eff"][self._year] # Muon ID, Iso get_mu_tight_id_sf = self._corrections["get_mu_tight_id_sf"][ self._year] get_mu_tight_iso_sf = self._corrections["get_mu_tight_iso_sf"][ self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64") pu = self._puweight_arr[pu_weight_idx] # <----- Helper functions ------># # Sort by PT helper function def sort_by_pt(ele, pho, jet): ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)] pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)] jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)] return ele, pho, jet # Lorentz vectors from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) def TLorentz_vector(vec): vec = ak.zip( { "x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t }, with_name="LorentzVector", ) return vec def TLorentz_vector_cylinder(vec): vec = ak.zip( { "pt": vec.pt, "eta": vec.eta, "phi": vec.phi, "mass": vec.mass, }, with_name="PtEtaPhiMLorentzVector", ) return vec # <----- Selection ------># Initial_events = events # Good Run ( Golden Json files ) from coffea import lumi_tools if isData: lumi_mask_builder = lumi_tools.LumiMask(injson) lumimask = ak.Array( lumi_mask_builder.__call__(events.run, events.luminosityBlock)) events = events[lumimask] # print("{0}% of files pass good-run conditions".format(len(events)/ len(Initial_events))) # Stop processing if there is no event remain if len(events) == 0: return out # Cut flow cut0 = np.zeros(len(events)) ##----------- Cut flow1: Passing Triggers # double lepton trigger is_double_ele_trigger = True if not is_double_ele_trigger: double_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[ path] # single lepton trigger is_single_ele_trigger = True if not is_single_ele_trigger: single_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[ path] events.Electron, events.Photon, events.Jet = sort_by_pt( events.Electron, events.Photon, events.Jet) # Good Primary vertex nPV = events.PV.npvsGood nPV_nw = events.PV.npvsGood if not isData: nPV = nPV * pu print(pu) # Apply cut1 events = events[double_ele_triggers_arr] if not isData: pu = pu[double_ele_triggers_arr] # Stop processing if there is no event remain if len(events) == 0: return out cut1 = np.ones(len(events)) # Set Particles Electron = events.Electron Muon = events.Muon Photon = events.Photon MET = events.MET Jet = events.Jet ##----------- Cut flow2: Muon Selection MuSelmask = ((Muon.pt >= 10) & (abs(Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15)) Muon = Muon[MuSelmask] # Exatly one muon Muon_sel_mask = ak.num(Muon) == 1 Electron = Electron[Muon_sel_mask] Photon = Photon[Muon_sel_mask] Jet = Jet[Muon_sel_mask] MET = MET[Muon_sel_mask] Muon = Muon[Muon_sel_mask] events = events[Muon_sel_mask] if not isData: pu = pu[Muon_sel_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut2 = np.ones(len(Photon)) * 2 ##----------- Cut flow3: Electron Selection EleSelmask = ((Electron.pt >= 10) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1)) | ( (Electron.pt >= 10) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2)) Electron = Electron[EleSelmask] # Exactly two electrons ee_mask = ak.num(Electron) == 2 Electron = Electron[ee_mask] Photon = Photon[ee_mask] Jet = Jet[ee_mask] MET = MET[ee_mask] Muon = Muon[ee_mask] if not isData: pu = pu[ee_mask] events = events[ee_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut3 = np.ones(len(Photon)) * 3 ##----------- Cut flow4: Photon Selection # Basic photon selection isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5)) Pixel_seed_mask = ~Photon.pixelSeed if (dataset == "ZZ") and (self._year == "2017"): PT_ID_mask = (Photon.pt >= 20) & ( Photon.cutBasedBitmap >= 3 ) # 2^0(Loose) + 2^1(Medium) + 2^2(Tights) else: PT_ID_mask = (Photon.pt >= 20) & (Photon.cutBased > 1) # dR cut with selected Muon and Electrons dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5, axis=-1) # default metric table: delta_r dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1) # genPartFlav cut """ if dataset == "WZG": isPrompt = (Photon.genPartFlav == 1) | (Photon.genPartFlav == 11) PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & isPrompt & dr_pho_ele_mask & dr_pho_mu_mask elif dataset == "WZ": isPrompt = (Photon.genPartFlav == 1) PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & ~isPrompt & dr_pho_ele_mask & dr_pho_mu_mask else: PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask """ # Add genPartFlav to remove Fake Photon in MC samples ( They are already considered by data driven method ) if not isData: genPartFlav_mask = (Photon.genPartFlav == 1) PhoSelmask = (genPartFlav_mask & PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask) else: PhoSelmask = (PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask) Photon = Photon[PhoSelmask] # Apply cut 4 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -------------------- Make Fake Photon BKGs---------------------------# def make_bins(pt, eta, bin_range_str): bin_dict = { "PT_1_eta_1": (pt > 20) & (pt < 30) & (eta < 1), "PT_1_eta_2": (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5), "PT_1_eta_3": (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2), "PT_1_eta_4": (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5), "PT_2_eta_1": (pt > 30) & (pt < 40) & (eta < 1), "PT_2_eta_2": (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5), "PT_2_eta_3": (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2), "PT_2_eta_4": (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5), "PT_3_eta_1": (pt > 40) & (pt < 50) & (eta < 1), "PT_3_eta_2": (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5), "PT_3_eta_3": (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2), "PT_3_eta_4": (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5), "PT_4_eta_1": (pt > 50) & (eta < 1), "PT_4_eta_2": (pt > 50) & (eta > 1) & (eta < 1.5), "PT_4_eta_3": (pt > 50) & (eta > 1.5) & (eta < 2), "PT_4_eta_4": (pt > 50) & (eta > 2) & (eta < 2.5), } binmask = bin_dict[bin_range_str] return binmask bin_name_list = [ "PT_1_eta_1", "PT_1_eta_2", "PT_1_eta_3", "PT_1_eta_4", "PT_2_eta_1", "PT_2_eta_2", "PT_2_eta_3", "PT_2_eta_4", "PT_3_eta_1", "PT_3_eta_2", "PT_3_eta_3", "PT_3_eta_4", "PT_4_eta_1", "PT_4_eta_2", "PT_4_eta_3", "PT_4_eta_4", ] ## -- Fake-fraction Lookup table --## if isFake: # Make Bin-range mask binned_pteta_mask = {} for name in bin_name_list: binned_pteta_mask[name] = make_bins( ak.flatten(leading_pho.pt), ak.flatten(abs(leading_pho.eta)), name, ) # Read Fake fraction --> Mapping bin name to int() if self._year == "2018": in_dict = np.load("Fitting_2018/Fit_results.npy", allow_pickle="True")[()] if self._year == "2017": in_dict = np.load("Fitting_2017/Fit_results.npy", allow_pickle="True")[()] idx = 0 fake_dict = {} for i, j in in_dict.items(): fake_dict[idx] = j idx += 1 # Reconstruct Fake_weight fw = 0 for i, j in binned_pteta_mask.items(): fw = fw + j * fake_dict[bin_name_list.index(i)] # Process 0 weight to 1 @numba.njit def zero_one(x): if x == 0: x = 1 return x vec_zero_one = np.vectorize(zero_one) fw = vec_zero_one(fw) else: fw = np.ones(len(events)) cut4 = np.ones(len(Photon)) * 4 print("Fake fraction weight: ", len(fw), len(cut4), fw) ##----------- Cut flow5: OSSF ossf_mask = Electron.charge[:, 0] + Electron.charge[:, 1] == 0 # Apply cut 5 Electron = Electron[ossf_mask] Photon = Photon[ossf_mask] fw = fw[ossf_mask] Jet = Jet[ossf_mask] MET = MET[ossf_mask] Muon = Muon[ossf_mask] if not isData: pu = pu[ossf_mask] events = events[ossf_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut5 = np.ones(ak.sum(ak.num(Electron) > 0)) * 5 # Define Electron Triplet Diele = ak.zip({ "lep1": Electron[:, 0], "lep2": Electron[:, 1], "p4": TLorentz_vector(Electron[:, 0] + Electron[:, 1]), }) leading_ele = Diele.lep1 subleading_ele = Diele.lep2 def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -- Scale Factor for each electron # Trigger weight helper function def Trigger_Weight(eta1, pt1, eta2, pt2): per_ev_MC = (get_ele_trig_leg1_mc_Eff(eta1, pt1) * get_ele_trig_leg2_mc_Eff(eta2, pt2) + get_ele_trig_leg1_mc_Eff(eta2, pt2) * get_ele_trig_leg2_mc_Eff(eta1, pt1) - get_ele_trig_leg1_mc_Eff(eta1, pt1) * get_ele_trig_leg1_mc_Eff(eta2, pt2)) per_ev_data = ( get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF( eta1, pt1) * get_ele_trig_leg2_data_Eff(eta2, pt2) * get_ele_trig_leg2_SF(eta2, pt2) + get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF( eta2, pt2) * get_ele_trig_leg2_data_Eff(eta1, pt1) * get_ele_trig_leg2_SF(eta1, pt1) - get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF( eta1, pt1) * get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF(eta2, pt2)) return per_ev_data / per_ev_MC if not isData: ## -------------< Egamma ID and Reco Scale factor > -----------------## get_pho_medium_id_sf = get_pho_medium_id_sf( ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt)) ele_reco_sf = get_ele_reco_above20_sf( leading_ele.deltaEtaSC + leading_ele.eta, leading_ele.pt, ) * get_ele_reco_above20_sf( subleading_ele.deltaEtaSC + subleading_ele.eta, subleading_ele.pt, ) ele_medium_id_sf = get_ele_medium_id_sf( leading_ele.deltaEtaSC + leading_ele.eta, leading_ele.pt, ) * get_ele_medium_id_sf( subleading_ele.deltaEtaSC + subleading_ele.eta, subleading_ele.pt, ) ## -------------< Muon ID and Iso Scale factor > -----------------## get_mu_tight_id_sf = get_mu_tight_id_sf(ak.flatten(abs(Muon.eta)), ak.flatten(Muon.pt)) get_mu_tight_iso_sf = get_mu_tight_iso_sf( ak.flatten(abs(Muon.eta)), ak.flatten(Muon.pt)) ## -------------< Double Electron Trigger Scale factor > -----------------## eta1 = leading_ele.deltaEtaSC + leading_ele.eta eta2 = subleading_ele.deltaEtaSC + subleading_ele.eta pt1 = leading_ele.pt pt2 = subleading_ele.pt # -- 2017,2016 are not applied yet if self._year == "2018": ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2) ##----------- Cut flow6: Baseline selection # Mee cut Mee_cut_mask = Diele.p4.mass > 4 # Lepton PT cuts Leppt_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20) & (Muon.pt >= 25)) # MET cuts MET_mask = MET.pt > 20 # Baseline # Assemble!! Baseline_mask = Leppt_mask & MET_mask & Mee_cut_mask # SR,CR # Apply cut6 Diele_base = Diele[Baseline_mask] leading_pho_base = leading_pho[Baseline_mask] Jet_base = Jet[Baseline_mask] MET_base = MET[Baseline_mask] Muon_base = Muon[Baseline_mask] events_base = events[Baseline_mask] # Photon EE and EB isEE_mask = leading_pho.isScEtaEE isEB_mask = leading_pho.isScEtaEB Pho_EE_base = leading_pho[isEE_mask & Baseline_mask] Pho_EB_base = leading_pho[isEB_mask & Baseline_mask] # Stop processing if there is no event remain if len(leading_pho_base) == 0: return out cut6 = np.ones(ak.sum(ak.num(leading_pho_base) > 0)) * 6 base_arr_dict = { "Diele_sel": Diele_base, "leading_pho_sel": leading_pho_base, "Jet_sel": Jet_base, "MET_sel": MET_base, "Muon_sel": Muon_base, "Pho_EE_sel": Pho_EE_base, "Pho_EB_sel": Pho_EB_base, } ##----------- << SR >> Zmass_window_mask = abs(Diele.p4.mass - 91.1876) < 15 MET_mask = MET.pt > 30 bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0 Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100 SR_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask SR_mask = Baseline_mask & SR_mask Diele_SR = Diele[SR_mask] leading_pho_SR = leading_pho[SR_mask] Muon_SR = Muon[SR_mask] MET_SR = MET[SR_mask] Jet_SR = Jet[SR_mask] events_SR = events[SR_mask] Pho_EE_SR = leading_pho[isEE_mask & SR_mask] Pho_EB_SR = leading_pho[isEB_mask & SR_mask] SR_arr_dict = { "Diele_sel": Diele_SR, "leading_pho_sel": leading_pho_SR, "Jet_sel": Jet_SR, "MET_sel": MET_SR, "Muon_sel": Muon_SR, "Pho_EE_sel": Pho_EE_SR, "Pho_EB_sel": Pho_EB_SR, } ##----------- << CR-Z+Jets >> Zmass_window_mask = abs(Diele.p4.mass - 91.1876) < 15 MET_mask = MET.pt <= 30 bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0 Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100 CR_ZJets_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask CR_ZJets_mask = Baseline_mask & CR_ZJets_mask Diele_CR_ZJets = Diele[CR_ZJets_mask] leading_pho_CR_ZJets = leading_pho[CR_ZJets_mask] Muon_CR_ZJets = Muon[CR_ZJets_mask] MET_CR_ZJets = MET[CR_ZJets_mask] Jet_CR_ZJets = Jet[CR_ZJets_mask] events_CR_ZJets = events[CR_ZJets_mask] Pho_EE_CR_ZJets = leading_pho[isEE_mask & CR_ZJets_mask] Pho_EB_CR_ZJets = leading_pho[isEB_mask & CR_ZJets_mask] CR_ZJets_arr_dict = { "Diele_sel": Diele_CR_ZJets, "leading_pho_sel": leading_pho_CR_ZJets, "Jet_sel": Jet_CR_ZJets, "MET_sel": MET_CR_ZJets, "Muon_sel": Muon_CR_ZJets, "Pho_EE_sel": Pho_EE_CR_ZJets, "Pho_EB_sel": Pho_EB_CR_ZJets, } ##----------- << CR-T-enriched >> Zmass_window_mask = abs(Diele.p4.mass - 91.1876) > 5 MET_mask = MET.pt > 30 bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) > 0 Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100 CR_Tenri_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask CR_Tenri_mask = Baseline_mask & CR_Tenri_mask Diele_CR_t = Diele[CR_Tenri_mask] leading_pho_CR_t = leading_pho[CR_Tenri_mask] Muon_CR_t = Muon[CR_Tenri_mask] MET_CR_t = MET[CR_Tenri_mask] Jet_CR_t = Jet[CR_Tenri_mask] events_CR_t = events[CR_Tenri_mask] Pho_EE_CR_t = leading_pho[isEE_mask & CR_Tenri_mask] Pho_EB_CR_t = leading_pho[isEB_mask & CR_Tenri_mask] CR_tEnriched_arr_dict = { "Diele_sel": Diele_CR_t, "leading_pho_sel": leading_pho_CR_t, "Jet_sel": Jet_CR_t, "MET_sel": MET_CR_t, "Muon_sel": Muon_CR_t, "Pho_EE_sel": Pho_EE_CR_t, "Pho_EB_sel": Pho_EB_CR_t, } ##----------- << CR-Conversion >> Zmass_window_mask = abs(Diele.p4.mass - 91.1876) > 15 MET_mask = MET.pt <= 30 bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0 Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) <= 100 CR_conv_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask CR_conv_mask = Baseline_mask & CR_conv_mask Diele_CR_conv = Diele[CR_conv_mask] leading_pho_CR_conv = leading_pho[CR_conv_mask] Muon_CR_conv = Muon[CR_conv_mask] MET_CR_conv = MET[CR_conv_mask] Jet_CR_conv = Jet[CR_conv_mask] events_CR_conv = events[CR_conv_mask] Pho_EE_CR_conv = leading_pho[isEE_mask & CR_conv_mask] Pho_EB_CR_conv = leading_pho[isEB_mask & CR_conv_mask] CR_Conversion_dict = { "Diele_sel": Diele_CR_conv, "leading_pho_sel": leading_pho_CR_conv, "Jet_sel": Jet_CR_conv, "MET_sel": MET_CR_conv, "Muon_sel": Muon_CR_conv, "Pho_EE_sel": Pho_EE_CR_conv, "Pho_EB_sel": Pho_EB_CR_conv, } ## -------------------- Prepare making hist --------------# regions = { "Baseline": base_arr_dict, "Signal": SR_arr_dict, "CR_ZJets": CR_ZJets_arr_dict, "CR_tEnriched": CR_tEnriched_arr_dict, "CR_conversion": CR_Conversion_dict, } mask_dict = { "Baseline": Baseline_mask, "Signal": SR_mask, "CR_ZJets": CR_ZJets_mask, "CR_tEnriched": CR_Tenri_mask, "CR_conversion": CR_conv_mask, } for region, arr_dict in regions.items(): # Photon phoPT = ak.flatten(arr_dict["leading_pho_sel"].pt) phoEta = ak.flatten(arr_dict["leading_pho_sel"].eta) phoPhi = ak.flatten(arr_dict["leading_pho_sel"].phi) # Photon EE if len(arr_dict["Pho_EE_sel"].pt) != 0: Pho_EE_PT = ak.flatten(arr_dict["Pho_EE_sel"].pt) Pho_EE_Eta = ak.flatten(arr_dict["Pho_EE_sel"].eta) Pho_EE_Phi = ak.flatten(arr_dict["Pho_EE_sel"].phi) Pho_EE_sieie = ak.flatten(arr_dict["Pho_EE_sel"].sieie) Pho_EE_Iso_charge = ak.flatten( arr_dict["Pho_EE_sel"].pfRelIso03_chg) # Photon EB if len(arr_dict["Pho_EB_sel"].pt) != 0: Pho_EB_PT = ak.flatten(arr_dict["Pho_EB_sel"].pt) Pho_EB_Eta = ak.flatten(arr_dict["Pho_EB_sel"].eta) Pho_EB_Phi = ak.flatten(arr_dict["Pho_EB_sel"].phi) Pho_EB_sieie = ak.flatten(arr_dict["Pho_EB_sel"].sieie) Pho_EB_Iso_charge = ak.flatten( arr_dict["Pho_EB_sel"].pfRelIso03_chg) # Electrons ele1PT = arr_dict["Diele_sel"].lep1.pt ele1Eta = arr_dict["Diele_sel"].lep1.eta ele1Phi = arr_dict["Diele_sel"].lep1.phi ele2PT = arr_dict["Diele_sel"].lep2.pt ele2Eta = arr_dict["Diele_sel"].lep2.eta ele2Phi = arr_dict["Diele_sel"].lep2.phi # Muon muPT = ak.flatten(arr_dict["Muon_sel"].pt) muEta = ak.flatten(arr_dict["Muon_sel"].eta) muPhi = ak.flatten(arr_dict["Muon_sel"].phi) # MET met = ak.to_numpy(arr_dict["MET_sel"].pt) # M(eea) M(ee) diele = arr_dict["Diele_sel"].p4 lll_vec = diele + arr_dict["Muon_sel"][:, 0] Mlll = lll_vec.mass Mee = diele.mass # W MT (--> beta) MT = np.sqrt( 2 * arr_dict["Muon_sel"].pt * arr_dict["MET_sel"].pt * (1 - np.cos( abs(arr_dict["MET_sel"].delta_phi(arr_dict["Muon_sel"]))))) MT = np.array(ak.firsts(MT)) # --- Apply weight and hist weights = processor.Weights(len(cut5)) # --- skim cut-weight def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr != 0 return ak.to_numpy(subarr[mask2]) cuts = mask_dict[region] cuts_pho_EE = ak.flatten(isEE_mask) cuts_pho_EB = ak.flatten(isEB_mask) if isFake: weights.add("fake_fraction", fw) # Weight and SF here if not (isData | isFake): weights.add("pileup", pu) weights.add("ele_id", ele_medium_id_sf) weights.add("ele_reco", ele_reco_sf) weights.add("pho_id", get_pho_medium_id_sf) weights.add("mu_id", get_mu_tight_id_sf) weights.add("mu_iso", get_mu_tight_id_sf) # 2016,2017 are not applied yet if self._year == "2018": weights.add("ele_trigger", ele_trig_weight) # ---------------------------- Fill hist --------------------------------------# # Initial events out["sumw"][dataset] += len(Initial_events) print( "region: {0} ### cut0: {1},cut1: {2}, cut2: {3},cut3: {4},cut4: {5},cut5: {6},cut6: {7}, cut7: {8}" .format(region, len(cut0), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5), len(cut6), len(met))) # Fill hist # -- met -- # out["met"].fill( dataset=dataset, region=region, met=met, weight=skim_weight(weights.weight() * cuts), ) # --mass -- # out["MT"].fill( dataset=dataset, region=region, MT=MT, weight=skim_weight(weights.weight() * cuts), ) out["mass"].fill( dataset=dataset, region=region, mass=Mee, weight=skim_weight(weights.weight() * cuts), ) out["mass_lll"].fill( dataset=dataset, region=region, mass_lll=Mlll, weight=skim_weight(weights.weight() * cuts), ) # -- Muon -- # out["mupt"].fill( dataset=dataset, region=region, mupt=muPT, weight=skim_weight(weights.weight() * cuts), ) out["mueta"].fill( dataset=dataset, region=region, mueta=muEta, weight=skim_weight(weights.weight() * cuts), ) out["muphi"].fill( dataset=dataset, region=region, muphi=muPhi, weight=skim_weight(weights.weight() * cuts), ) # -- Electron -- # out["ele1pt"].fill( dataset=dataset, region=region, ele1pt=ele1PT, weight=skim_weight(weights.weight() * cuts), ) out["ele1eta"].fill( dataset=dataset, region=region, ele1eta=ele1Eta, weight=skim_weight(weights.weight() * cuts), ) out["ele1phi"].fill( dataset=dataset, region=region, ele1phi=ele1Phi, weight=skim_weight(weights.weight() * cuts), ) out["ele2pt"].fill( dataset=dataset, region=region, ele2pt=ele2PT, weight=skim_weight(weights.weight() * cuts), ) out["ele2eta"].fill( dataset=dataset, region=region, ele2eta=ele2Eta, weight=skim_weight(weights.weight() * cuts), ) out["ele2phi"].fill( dataset=dataset, region=region, ele2phi=ele2Phi, weight=skim_weight(weights.weight() * cuts), ) # -- Photon -- # out["phopt"].fill( dataset=dataset, region=region, phopt=phoPT, weight=skim_weight(weights.weight() * cuts), ) out["phoeta"].fill( dataset=dataset, region=region, phoeta=phoEta, weight=skim_weight(weights.weight() * cuts), ) out["phophi"].fill( dataset=dataset, region=region, phophi=phoPhi, weight=skim_weight(weights.weight() * cuts), ) if len(arr_dict["Pho_EE_sel"].pt) != 0: out["pho_EE_pt"].fill( dataset=dataset, region=region, pho_EE_pt=Pho_EE_PT, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_eta"].fill( dataset=dataset, region=region, pho_EE_eta=Pho_EE_Eta, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_phi"].fill( dataset=dataset, region=region, pho_EE_phi=Pho_EE_Phi, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_sieie"].fill( dataset=dataset, region=region, pho_EE_sieie=Pho_EE_sieie, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_Iso_chg"].fill( dataset=dataset, region=region, pho_EE_Iso_chg=Pho_EE_Iso_charge, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) if len(arr_dict["Pho_EB_sel"].pt) != 0: out["pho_EB_pt"].fill( dataset=dataset, region=region, pho_EB_pt=Pho_EB_PT, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_eta"].fill( dataset=dataset, region=region, pho_EB_eta=Pho_EB_Eta, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_phi"].fill( dataset=dataset, region=region, pho_EB_phi=Pho_EB_Phi, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_sieie"].fill( dataset=dataset, region=region, pho_EB_sieie=Pho_EB_sieie, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_Iso_chg"].fill( dataset=dataset, region=region, pho_EB_Iso_chg=Pho_EB_Iso_charge, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) return out
def test_corrected_jets_factory(): import os from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack events = None from coffea.nanoevents import NanoEventsFactory factory = NanoEventsFactory.from_root( os.path.abspath("tests/samples/nano_dy.root")) events = factory.events() jec_stack_names = [ "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi", "Spring16_25nsV10_MC_PtResolution_AK4PFPuppi", "Spring16_25nsV10_MC_SF_AK4PFPuppi", ] for key in evaluator.keys(): if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in key: jec_stack_names.append(key) jec_inputs = {name: evaluator[name] for name in jec_stack_names} jec_stack = JECStack(jec_inputs) name_map = jec_stack.blank_name_map name_map["JetPt"] = "pt" name_map["JetMass"] = "mass" name_map["JetEta"] = "eta" name_map["JetA"] = "area" jets = events.Jet jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"] jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"] jets["pt_gen"] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] name_map["ptGenJet"] = "pt_gen" name_map["ptRaw"] = "pt_raw" name_map["massRaw"] = "mass_raw" name_map["Rho"] = "rho" jec_cache = cachetools.Cache(np.inf) print(name_map) tic = time.time() jet_factory = CorrectedJetsFactory(name_map, jec_stack) toc = time.time() print("setup corrected jets time =", toc - tic) tic = time.time() prof = pyinstrument.Profiler() prof.start() corrected_jets = jet_factory.build(jets, lazy_cache=jec_cache) prof.stop() toc = time.time() print("corrected_jets build time =", toc - tic) print(prof.output_text(unicode=True, color=True, show_all=True)) tic = time.time() print("Generated jet pt:", corrected_jets.pt_gen) print("Original jet pt:", corrected_jets.pt_orig) print("Raw jet pt:", jets.pt_raw) print("Corrected jet pt:", corrected_jets.pt) print("Original jet mass:", corrected_jets.mass_orig) print("Raw jet mass:", jets["mass_raw"]) print("Corrected jet mass:", corrected_jets.mass) print("jet eta:", jets.eta) for unc in jet_factory.uncertainties(): print(unc) print(corrected_jets[unc].up.pt) print(corrected_jets[unc].down.pt) toc = time.time() print("build all jet variations =", toc - tic) # Test that the corrections were applied correctly from coffea.jetmet_tools import ( FactorizedJetCorrector, JetResolution, JetResolutionScaleFactor, ) scalar_form = ak.without_parameters(jets["pt_raw"]).layout.form corrector = FactorizedJetCorrector( **{name: evaluator[name] for name in jec_stack_names[0:4]}) corrs = corrector.getCorrection(JetEta=jets["eta"], Rho=jets["rho"], JetPt=jets["pt_raw"], JetA=jets["area"]) reso = JetResolution( **{name: evaluator[name] for name in jec_stack_names[4:5]}) jets["jet_energy_resolution"] = reso.getResolution( JetEta=jets["eta"], Rho=jets["rho"], JetPt=jets["pt_raw"], form=scalar_form, lazy_cache=jec_cache, ) resosf = JetResolutionScaleFactor( **{name: evaluator[name] for name in jec_stack_names[5:6]}) jets["jet_energy_resolution_scale_factor"] = resosf.getScaleFactor( JetEta=jets["eta"], lazy_cache=jec_cache) # Filter out the non-deterministic (no gen pt) jets def smear_factor(jetPt, pt_gen, jersf): return (ak.full_like(jetPt, 1.0) + (jersf[:, 0] - ak.full_like(jetPt, 1.0)) * (jetPt - pt_gen) / jetPt) test_gen_pt = ak.concatenate( [corrected_jets.pt_gen[0, :-2], corrected_jets.pt_gen[-1, :-1]]) test_raw_pt = ak.concatenate([jets.pt_raw[0, :-2], jets.pt_raw[-1, :-1]]) test_pt = ak.concatenate( [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]]) test_eta = ak.concatenate([jets.eta[0, :-2], jets.eta[-1, :-1]]) test_jer = ak.concatenate([ jets.jet_energy_resolution[0, :-2], jets.jet_energy_resolution[-1, :-1] ]) test_jer_sf = ak.concatenate([ jets.jet_energy_resolution_scale_factor[0, :-2], jets.jet_energy_resolution_scale_factor[-1, :-1], ]) test_jec = ak.concatenate([corrs[0, :-2], corrs[-1, :-1]]) test_corrected_pt = ak.concatenate( [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]]) test_corr_pt = test_raw_pt * test_jec test_pt_smear_corr = test_corr_pt * smear_factor(test_corr_pt, test_gen_pt, test_jer_sf) # Print the results of the "by-hand" calculations and confirm that the values match the expected values print("\nConfirm the CorrectedJetsFactory values:") print("Jet pt (gen)", test_gen_pt.tolist()) print("Jet pt (raw)", test_raw_pt.tolist()) print("Jet pt (nano):", test_pt.tolist()) print("Jet eta:", test_eta.tolist()) print("Jet energy resolution:", test_jer.tolist()) print("Jet energy resolution sf:", test_jer_sf.tolist()) print("Jet energy correction:", test_jec.tolist()) print("Corrected jet pt (ref)", test_corr_pt.tolist()) print("Corrected & smeared jet pt (ref):", test_pt_smear_corr.tolist()) print("Corrected & smeared jet pt:", test_corrected_pt.tolist(), "\n") assert ak.all(np.abs(test_pt_smear_corr - test_corrected_pt) < 1e-6) name_map["METpt"] = "pt" name_map["METphi"] = "phi" name_map["JetPhi"] = "phi" name_map["UnClusteredEnergyDeltaX"] = "MetUnclustEnUpDeltaX" name_map["UnClusteredEnergyDeltaY"] = "MetUnclustEnUpDeltaY" tic = time.time() met_factory = CorrectedMETFactory(name_map) toc = time.time() print("setup corrected MET time =", toc - tic) met = events.MET tic = time.time() # prof = pyinstrument.Profiler() # prof.start() corrected_met = met_factory.build(met, corrected_jets, lazy_cache=jec_cache) # prof.stop() toc = time.time() # print(prof.output_text(unicode=True, color=True, show_all=True)) print("corrected_met build time =", toc - tic) tic = time.time() print(corrected_met.pt_orig) print(corrected_met.pt) prof = pyinstrument.Profiler() prof.start() for unc in jet_factory.uncertainties() + met_factory.uncertainties(): print(unc) print(corrected_met[unc].up.pt) print(corrected_met[unc].down.pt) prof.stop() toc = time.time() print("build all met variations =", toc - tic) print(prof.output_text(unicode=True, color=True, show_all=True))
def process(self, events): # Dataset parameters dataset = events.metadata['dataset'] histAxisName = self._samples[dataset]['histAxisName'] year = self._samples[dataset]['year'] xsec = self._samples[dataset]['xsec'] sow = self._samples[dataset]['nSumOfWeights'] isData = self._samples[dataset]['isData'] datasets = [ 'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon', 'DoubleElectron' ] for d in datasets: if d in dataset: dataset = dataset.split('_')[0] # Initialize objects met = events.MET e = events.Electron mu = events.Muon tau = events.Tau j = events.Jet # Muon selection mu['isPres'] = isPresMuon(mu.dxy, mu.dz, mu.sip3d, mu.looseId) mu['isTight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.pfRelIso03_all, mu.sip3d, mu.mvaTTH, mu.mediumPromptId, mu.tightCharge, mu.looseId, minpt=10) mu['isGood'] = mu['isPres'] & mu['isTight'] leading_mu = mu[ak.argmax(mu.pt, axis=-1, keepdims=True)] leading_mu = leading_mu[leading_mu.isGood] mu = mu[mu.isGood] mu_pres = mu[mu.isPres] # Electron selection e['isPres'] = isPresElec(e.pt, e.eta, e.dxy, e.dz, e.miniPFRelIso_all, e.sip3d, e.lostHits, minpt=15) e['isTight'] = isTightElec(e.pt, e.eta, e.dxy, e.dz, e.miniPFRelIso_all, e.sip3d, e.mvaTTH, e.mvaFall17V2Iso, e.lostHits, e.convVeto, e.tightCharge, e.sieie, e.hoe, e.eInvMinusPInv, minpt=15) e['isClean'] = isClean(e, mu, drmin=0.05) e['isGood'] = e['isPres'] & e['isTight'] & e['isClean'] leading_e = e[ak.argmax(e.pt, axis=-1, keepdims=True)] leading_e = leading_e[leading_e.isGood] e = e[e.isGood] e_pres = e[e.isPres & e.isClean] # Tau selection tau['isPres'] = isPresTau(tau.pt, tau.eta, tau.dxy, tau.dz, tau.leadTkPtOverTauPt, tau.idAntiMu, tau.idAntiEle, tau.rawIso, tau.idDecayModeNewDMs, minpt=20) tau['isClean'] = isClean(tau, e_pres, drmin=0.4) & isClean( tau, mu_pres, drmin=0.4) tau['isGood'] = tau['isPres'] # & tau['isClean'], for the moment tau = tau[tau.isGood] nElec = ak.num(e) nMuon = ak.num(mu) nTau = ak.num(tau) twoLeps = (nElec + nMuon) == 2 threeLeps = (nElec + nMuon) == 3 twoElec = (nElec == 2) twoMuon = (nMuon == 2) e0 = e[ak.argmax(e.pt, axis=-1, keepdims=True)] m0 = mu[ak.argmax(mu.pt, axis=-1, keepdims=True)] # Attach the lepton SFs to the electron and muons collections AttachElectronSF(e, year=year) AttachMuonSF(mu, year=year) # Create a lepton (muon+electron) collection and calculate a per event lepton SF leps = ak.concatenate([e, mu], axis=-1) events['lepSF_nom'] = ak.prod(leps.sf_nom, axis=-1) events['lepSF_hi'] = ak.prod(leps.sf_hi, axis=-1) events['lepSF_lo'] = ak.prod(leps.sf_lo, axis=-1) # Jet selection jetptname = 'pt_nom' if hasattr(j, 'pt_nom') else 'pt' ### Jet energy corrections if not isData: j["pt_raw"] = (1 - j.rawFactor) * j.pt j["mass_raw"] = (1 - j.rawFactor) * j.mass j["pt_gen"] = ak.values_astype(ak.fill_none(j.matched_gen.pt, 0), np.float32) j["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, j.pt)[0] events_cache = events.caches[0] corrected_jets = jet_factory.build(j, lazy_cache=events_cache) #print('jet pt: ',j.pt) #print('cor pt: ',corrected_jets.pt) #print('jes up: ',corrected_jets.JES_jes.up.pt) #print('jes down: ',corrected_jets.JES_jes.down.pt) #print(ak.fields(corrected_jets)) ''' # SYSTEMATICS jets = corrected_jets if(self.jetSyst == 'JERUp'): jets = corrected_jets.JER.up elif(self.jetSyst == 'JERDown'): jets = corrected_jets.JER.down elif(self.jetSyst == 'JESUp'): jets = corrected_jets.JES_jes.up elif(self.jetSyst == 'JESDown'): jets = corrected_jets.JES_jes.down ''' j['isGood'] = isTightJet(getattr(j, jetptname), j.eta, j.jetId, j.neHEF, j.neEmEF, j.chHEF, j.chEmEF, j.nConstituents) #j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId) #j['isclean'] = isClean(j, e, mu) j['isClean'] = isClean(j, e, drmin=0.4) & isClean( j, mu, drmin=0.4) # & isClean(j, tau, drmin=0.4) goodJets = j[(j.isClean) & (j.isGood)] njets = ak.num(goodJets) ht = ak.sum(goodJets.pt, axis=-1) j0 = goodJets[ak.argmax(goodJets.pt, axis=-1, keepdims=True)] #nbtags = ak.num(goodJets[goodJets.btagDeepFlavB > 0.2770]) # Loose DeepJet WP if year == 2017: btagwpl = 0.0532 #WP loose else: btagwpl = 0.0490 #WP loose isBtagJetsLoose = (goodJets.btagDeepB > btagwpl) isNotBtagJetsLoose = np.invert(isBtagJetsLoose) nbtagsl = ak.num(goodJets[isBtagJetsLoose]) # Medium DeepJet WP if year == 2017: btagwpm = 0.3040 #WP medium else: btagwpm = 0.2783 #WP medium isBtagJetsMedium = (goodJets.btagDeepB > btagwpm) isNotBtagJetsMedium = np.invert(isBtagJetsMedium) nbtagsm = ak.num(goodJets[isBtagJetsMedium]) # Btag SF following 1a) in https://twiki.cern.ch/twiki/bin/viewauth/CMS/BTagSFMethods btagSF = np.ones_like(ht) btagSFUp = np.ones_like(ht) btagSFDo = np.ones_like(ht) if not isData: pt = goodJets.pt abseta = np.abs(goodJets.eta) flav = goodJets.hadronFlavour bJetSF = GetBTagSF(abseta, pt, flav) bJetSFUp = GetBTagSF(abseta, pt, flav, sys=1) bJetSFDo = GetBTagSF(abseta, pt, flav, sys=-1) bJetEff = GetBtagEff(abseta, pt, flav, year) bJetEff_data = bJetEff * bJetSF bJetEff_dataUp = bJetEff * bJetSFUp bJetEff_dataDo = bJetEff * bJetSFDo pMC = ak.prod(bJetEff[isBtagJetsMedium], axis=-1) * ak.prod( (1 - bJetEff[isNotBtagJetsMedium]), axis=-1) pData = ak.prod(bJetEff_data[isBtagJetsMedium], axis=-1) * ak.prod( (1 - bJetEff_data[isNotBtagJetsMedium]), axis=-1) pDataUp = ak.prod( bJetEff_dataUp[isBtagJetsMedium], axis=-1) * ak.prod( (1 - bJetEff_dataUp[isNotBtagJetsMedium]), axis=-1) pDataDo = ak.prod( bJetEff_dataDo[isBtagJetsMedium], axis=-1) * ak.prod( (1 - bJetEff_dataDo[isNotBtagJetsMedium]), axis=-1) pMC = ak.where(pMC == 0, 1, pMC) # removeing zeroes from denominator... btagSF = pData / pMC btagSFUp = pDataUp / pMC btagSFDo = pDataUp / pMC ################################################################## ### 2 same-sign leptons ################################################################## # emu singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)] singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)] em = ak.cartesian({"e": singe, "m": singm}) emSSmask = (em.e.charge * em.m.charge > 0) emSS = em[emSSmask] nemSS = len(ak.flatten(emSS)) # ee and mumu # pt>-1 to preserve jagged dimensions ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)] mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)] sumcharge = ak.sum(e.charge, axis=-1) + ak.sum(mu.charge, axis=-1) eepairs = ak.combinations(ee, 2, fields=["e0", "e1"]) eeSSmask = (eepairs.e0.charge * eepairs.e1.charge > 0) eeonZmask = (np.abs((eepairs.e0 + eepairs.e1).mass - 91.2) < 10) eeoffZmask = (eeonZmask == 0) mmpairs = ak.combinations(mm, 2, fields=["m0", "m1"]) mmSSmask = (mmpairs.m0.charge * mmpairs.m1.charge > 0) mmonZmask = (np.abs((mmpairs.m0 + mmpairs.m1).mass - 91.2) < 10) mmoffZmask = (mmonZmask == 0) eeSSonZ = eepairs[eeSSmask & eeonZmask] eeSSoffZ = eepairs[eeSSmask & eeoffZmask] mmSSonZ = mmpairs[mmSSmask & mmonZmask] mmSSoffZ = mmpairs[mmSSmask & mmoffZmask] neeSS = len(ak.flatten(eeSSonZ)) + len(ak.flatten(eeSSoffZ)) nmmSS = len(ak.flatten(mmSSonZ)) + len(ak.flatten(mmSSoffZ)) print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]' % (neeSS, nemSS, nmmSS)) # Cuts eeSSmask = (ak.num(eeSSmask[eeSSmask]) > 0) mmSSmask = (ak.num(mmSSmask[mmSSmask]) > 0) eeonZmask = (ak.num(eeonZmask[eeonZmask]) > 0) eeoffZmask = (ak.num(eeoffZmask[eeoffZmask]) > 0) mmonZmask = (ak.num(mmonZmask[mmonZmask]) > 0) mmoffZmask = (ak.num(mmoffZmask[mmoffZmask]) > 0) emSSmask = (ak.num(emSSmask[emSSmask]) > 0) ################################################################## ### 3 leptons ################################################################## # eem muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)] elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)] ee_eem = ak.combinations(elec_eem, 2, fields=["e0", "e1"]) ee_eemZmask = (ee_eem.e0.charge * ee_eem.e1.charge < 1) & (np.abs( (ee_eem.e0 + ee_eem.e1).mass - 91.2) < 10) ee_eemOffZmask = (ee_eem.e0.charge * ee_eem.e1.charge < 1) & (np.abs( (ee_eem.e0 + ee_eem.e1).mass - 91.2) > 10) ee_eemZmask = (ak.num(ee_eemZmask[ee_eemZmask]) > 0) ee_eemOffZmask = (ak.num(ee_eemOffZmask[ee_eemOffZmask]) > 0) eepair_eem = (ee_eem.e0 + ee_eem.e1) trilep_eem = eepair_eem + muon_eem #ak.cartesian({"e0":ee_eem.e0,"e1":ee_eem.e1, "m":muon_eem}) # mme muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)] elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)] mm_mme = ak.combinations(muon_mme, 2, fields=["m0", "m1"]) mm_mmeZmask = (mm_mme.m0.charge * mm_mme.m1.charge < 1) & (np.abs( (mm_mme.m0 + mm_mme.m1).mass - 91.2) < 10) mm_mmeOffZmask = (mm_mme.m0.charge * mm_mme.m1.charge < 1) & (np.abs( (mm_mme.m0 + mm_mme.m1).mass - 91.2) > 10) mm_mmeZmask = (ak.num(mm_mmeZmask[mm_mmeZmask]) > 0) mm_mmeOffZmask = (ak.num(mm_mmeOffZmask[mm_mmeOffZmask]) > 0) mmpair_mme = (mm_mme.m0 + mm_mme.m1) trilep_mme = mmpair_mme + elec_mme mZ_mme = mmpair_mme.mass mZ_eem = eepair_eem.mass m3l_eem = trilep_eem.mass m3l_mme = trilep_mme.mass # eee and mmm eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)] mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)] eee_leps = ak.combinations(eee, 3, fields=["e0", "e1", "e2"]) mmm_leps = ak.combinations(mmm, 3, fields=["m0", "m1", "m2"]) ee_pairs = ak.combinations(eee, 2, fields=["e0", "e1"]) mm_pairs = ak.combinations(mmm, 2, fields=["m0", "m1"]) ee_pairs_index = ak.argcombinations(eee, 2, fields=["e0", "e1"]) mm_pairs_index = ak.argcombinations(mmm, 2, fields=["m0", "m1"]) mmSFOS_pairs = mm_pairs[ (np.abs(mm_pairs.m0.pdgId) == np.abs(mm_pairs.m1.pdgId)) & (mm_pairs.m0.charge != mm_pairs.m1.charge)] offZmask_mm = ak.all( np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2) > 10., axis=1, keepdims=True) & (ak.num(mmSFOS_pairs) > 0) onZmask_mm = ak.any( np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2) < 10., axis=1, keepdims=True) eeSFOS_pairs = ee_pairs[ (np.abs(ee_pairs.e0.pdgId) == np.abs(ee_pairs.e1.pdgId)) & (ee_pairs.e0.charge != ee_pairs.e1.charge)] offZmask_ee = ak.all( np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2) > 10, axis=1, keepdims=True) & (ak.num(eeSFOS_pairs) > 0) onZmask_ee = ak.any( np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2) < 10, axis=1, keepdims=True) # Create masks **for event selection** eeeOnZmask = (ak.num(onZmask_ee[onZmask_ee]) > 0) eeeOffZmask = (ak.num(offZmask_ee[offZmask_ee]) > 0) mmmOnZmask = (ak.num(onZmask_mm[onZmask_mm]) > 0) mmmOffZmask = (ak.num(offZmask_mm[offZmask_mm]) > 0) # Now we need to create masks for the leptons in order to select leptons from the Z boson candidate (in onZ categories) ZeeMask = ak.argmin(np.abs((eeSFOS_pairs.e0 + eeSFOS_pairs.e1).mass - 91.2), axis=1, keepdims=True) ZmmMask = ak.argmin(np.abs((mmSFOS_pairs.m0 + mmSFOS_pairs.m1).mass - 91.2), axis=1, keepdims=True) Zee = eeSFOS_pairs[ZeeMask] Zmm = mmSFOS_pairs[ZmmMask] eZ0 = Zee.e0[ak.num(eeSFOS_pairs) > 0] eZ1 = Zee.e1[ak.num(eeSFOS_pairs) > 0] eZ = eZ0 + eZ1 mZ0 = Zmm.m0[ak.num(mmSFOS_pairs) > 0] mZ1 = Zmm.m1[ak.num(mmSFOS_pairs) > 0] mZ = mZ0 + mZ1 mZ_eee = eZ.mass mZ_mmm = mZ.mass # And for the W boson ZmmIndices = mm_pairs_index[ZmmMask] ZeeIndices = ee_pairs_index[ZeeMask] eW = eee[~ZeeIndices.e0 | ~ZeeIndices.e1] mW = mmm[~ZmmIndices.m0 | ~ZmmIndices.m1] triElec = eee_leps.e0 + eee_leps.e1 + eee_leps.e2 triMuon = mmm_leps.m0 + mmm_leps.m1 + mmm_leps.m2 m3l_eee = triElec.mass m3l_mmm = triMuon.mass ################################################################## ### >=4 leptons ################################################################## # 4lep cat is4lmask = ((nElec + nMuon) >= 4) muon_4l = mu[(is4lmask) & (mu.pt > -1)] elec_4l = e[(is4lmask) & (e.pt > -1)] # selecting 4 leading leptons leptons = ak.concatenate([e, mu], axis=-1) leptons_sorted = leptons[ak.argsort(leptons.pt, axis=-1, ascending=False)] lep4l = leptons_sorted[:, 0:4] e4l = lep4l[abs(lep4l.pdgId) == 11] mu4l = lep4l[abs(lep4l.pdgId) == 13] nElec4l = ak.num(e4l) nMuon4l = ak.num(mu4l) # Triggers trig_eeSS = passTrigger(events, 'ee', isData, dataset) trig_mmSS = passTrigger(events, 'mm', isData, dataset) trig_emSS = passTrigger(events, 'em', isData, dataset) trig_eee = passTrigger(events, 'eee', isData, dataset) trig_mmm = passTrigger(events, 'mmm', isData, dataset) trig_eem = passTrigger(events, 'eem', isData, dataset) trig_mme = passTrigger(events, 'mme', isData, dataset) trig_4l = triggerFor4l(events, nMuon, nElec, isData, dataset) # MET filters # Weights genw = np.ones_like(events['event']) if ( isData or len(self._wc_names_lst) > 0) else events['genWeight'] ### We need weights for: normalization, lepSF, triggerSF, pileup, btagSF... weights = {} for r in [ 'all', 'ee', 'mm', 'em', 'eee', 'mmm', 'eem', 'mme', 'eeee', 'eeem', 'eemm', 'mmme', 'mmmm' ]: # weights[r] = coffea.analysis_tools.Weights(len(events)) weights[r] = coffea.analysis_tools.Weights(len(events), storeIndividual=True) if len(self._wc_names_lst) > 0: sow = np.ones_like( sow ) # Not valid in nanoAOD for EFT samples, MUST use SumOfEFTweights at analysis level weights[r].add('norm', genw if isData else (xsec / sow) * genw) weights[r].add('btagSF', btagSF, btagSFUp, btagSFDo) weights[r].add('lepSF', events.lepSF_nom, events.lepSF_hi, events.lepSF_lo) # Extract the EFT quadratic coefficients and optionally use them to calculate the coefficients on the w**2 quartic function # eft_coeffs is never Jagged so convert immediately to numpy for ease of use. eft_coeffs = ak.to_numpy(events['EFTfitCoefficients']) if hasattr( events, "EFTfitCoefficients") else None if eft_coeffs is not None: # Check to see if the ordering of WCs for this sample matches what want if self._samples[dataset]['WCnames'] != self._wc_names_lst: eft_coeffs = efth.remap_coeffs( self._samples[dataset]['WCnames'], self._wc_names_lst, eft_coeffs) eft_w2_coeffs = efth.calc_w2_coeffs(eft_coeffs, self._dtype) if ( self._do_errors and eft_coeffs is not None) else None # Selections and cuts selections = PackedSelection() #(dtype='uint64') channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS'] selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS)) selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS)) selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS)) selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS)) selections.add('emSS', (emSSmask) & (trig_emSS)) channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ'] selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem)) selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem)) selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme)) selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme)) channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ'] selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee)) selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee)) selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm)) selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm)) channels4L = ['eeee', 'eeem', 'eemm', 'mmme', 'mmmm'] selections.add('eeee', ((nElec4l == 4) & (nMuon4l == 0)) & (trig_4l)) selections.add('eeem', ((nElec4l == 3) & (nMuon4l == 1)) & (trig_4l)) selections.add('eemm', ((nElec4l == 2) & (nMuon4l == 2)) & (trig_4l)) selections.add('mmme', ((nElec4l == 1) & (nMuon4l == 3)) & (trig_4l)) selections.add('mmmm', ((nElec4l == 0) & (nMuon4l == 4)) & (trig_4l)) selections.add('ch+', (sumcharge > 0)) selections.add('ch-', (sumcharge < 0)) selections.add('ch0', (sumcharge == 0)) levels = ['base', '1+bm2+bl', '1bm', '2+bm'] selections.add('base', (nElec + nMuon >= 2)) selections.add('1+bm2+bl', (nElec + nMuon >= 2) & ((nbtagsm >= 1) & (nbtagsl >= 2))) selections.add('1bm', (nElec + nMuon >= 2) & (nbtagsm == 1)) selections.add('2+bm', (nElec + nMuon >= 2) & (nbtagsm >= 2)) # Variables invMass_eeSSonZ = (eeSSonZ.e0 + eeSSonZ.e1).mass invMass_eeSSoffZ = (eeSSoffZ.e0 + eeSSoffZ.e1).mass invMass_mmSSonZ = (mmSSonZ.m0 + mmSSonZ.m1).mass invMass_mmSSoffZ = (mmSSoffZ.m0 + mmSSoffZ.m1).mass invMass_emSS = (emSS.e + emSS.m).mass varnames = {} varnames['met'] = met.pt varnames['ht'] = ht varnames['njets'] = njets varnames['invmass'] = { 'eeSSonZ': invMass_eeSSonZ, 'eeSSoffZ': invMass_eeSSoffZ, 'mmSSonZ': invMass_mmSSonZ, 'mmSSoffZ': invMass_mmSSoffZ, 'emSS': invMass_emSS, 'eemSSonZ': mZ_eem, 'eemSSoffZ': mZ_eem, 'mmeSSonZ': mZ_mme, 'mmeSSoffZ': mZ_mme, 'eeeSSonZ': mZ_eee, 'eeeSSoffZ': mZ_eee, 'mmmSSonZ': mZ_mmm, 'mmmSSoffZ': mZ_mmm, } varnames['m3l'] = { 'eemSSonZ': m3l_eem, 'eemSSoffZ': m3l_eem, 'mmeSSonZ': m3l_mme, 'mmeSSoffZ': m3l_mme, 'eeeSSonZ': m3l_eee, 'eeeSSoffZ': m3l_eee, 'mmmSSonZ': m3l_mmm, 'mmmSSoffZ': m3l_mmm, } varnames['e0pt'] = e0.pt varnames['e0eta'] = e0.eta varnames['m0pt'] = m0.pt varnames['m0eta'] = m0.eta varnames['j0pt'] = j0.pt varnames['j0eta'] = j0.eta varnames['counts'] = np.ones_like(events['event']) # systematics systList = [] if isData == False: systList = ['nominal'] if self._do_systematics: systList = systList + [ 'lepSFUp', 'lepSFDown', 'btagSFUp', 'btagSFDown' ] else: systList = ['noweight'] # fill Histos hout = self.accumulator.identity() normweights = weights['all'].weight().flatten( ) # Why does it not complain about .flatten() here? sowweights = np.ones_like(normweights) if len( self._wc_names_lst) > 0 else normweights hout['SumOfEFTweights'].fill(sample=histAxisName, SumOfEFTweights=varnames['counts'], weight=sowweights, eft_coeff=eft_coeffs, eft_err_coeff=eft_w2_coeffs) for syst in systList: for var, v in varnames.items(): for ch in channels2LSS + channels3L + channels4L: for sumcharge in ['ch+', 'ch-', 'ch0']: for lev in levels: #find the event weight to be used when filling the histograms weightSyst = syst #in the case of 'nominal', or the jet energy systematics, no weight systematic variation is used (weightSyst=None) if syst in [ 'nominal', 'JERUp', 'JERDown', 'JESUp', 'JESDown' ]: weightSyst = None # no weight systematic for these variations if syst == 'noweight': weight = np.ones(len(events)) # for data else: # call weights.weight() with the name of the systematic to be varied if ch in channels3L: ch_w = ch[:3] elif ch in channels2LSS: ch_w = ch[:2] else: ch_w = ch weight = weights['all'].weight( weightSyst ) if isData else weights[ch_w].weight( weightSyst) cuts = [ch] + [lev] + [sumcharge] cut = selections.all(*cuts) weights_flat = weight[cut].flatten( ) # Why does it not complain about .flatten() here? weights_ones = np.ones_like(weights_flat, dtype=np.int) eft_coeffs_cut = eft_coeffs[ cut] if eft_coeffs is not None else None eft_w2_coeffs_cut = eft_w2_coeffs[ cut] if eft_w2_coeffs is not None else None # filling histos if var == 'invmass': if ((ch in [ 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ', 'mmmSSonZ' ]) or (ch in channels4L)): continue else: values = ak.flatten(v[ch][cut]) hout['invmass'].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, invmass=values, weight=weights_flat, systematic=syst) elif var == 'm3l': if ((ch in channels2LSS) or (ch in [ 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ', 'mmmSSonZ' ]) or (ch in channels4L)): continue values = ak.flatten(v[ch][cut]) hout['m3l'].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, m3l=values, weight=weights_flat, systematic=syst) else: values = v[cut] # These all look identical, do we need if/else here? if var == 'ht': hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, ht=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) elif var == 'met': hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, met=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) elif var == 'njets': hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, njets=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) elif var == 'nbtags': hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, nbtags=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) elif var == 'counts': hout[var].fill(counts=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_ones, systematic=syst) elif var == 'j0eta': if lev == 'base': continue values = ak.flatten(values) #values=np.asarray(values) hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, j0eta=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) elif var == 'e0pt': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ', 'mmmm' ]: continue values = ak.flatten(values) #values=np.asarray(values) hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, e0pt=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst ) # Crashing here, not sure why. Related to values? elif var == 'm0pt': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ', 'eeee' ]: continue values = ak.flatten(values) #values=np.asarray(values) hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, m0pt=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) elif var == 'e0eta': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ', 'mmmm' ]: continue values = ak.flatten(values) #values=np.asarray(values) hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, e0eta=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) elif var == 'm0eta': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ', 'eeee' ]: continue values = ak.flatten(values) #values=np.asarray(values) hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, m0eta=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) elif var == 'j0pt': if lev == 'base': continue values = ak.flatten(values) #values=np.asarray(values) hout[var].fill( eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut, j0pt=values, sample=histAxisName, channel=ch, cut=lev, sumcharge=sumcharge, weight=weights_flat, systematic=syst) return hout
def process_jets(events, year, corrections=None): jets = events["Jet"] jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"] jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"] jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] if not events.metadata["dataset"].startswith("data_Single"): jets["pt_gen"] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) ## add btag wps for bdiscr in btag_values[year].keys(): for wp in btag_values[year][bdiscr].keys(): jets[wp] = (jets[bdiscr] > btag_values[year][bdiscr][wp]) ## apply jet corrections if (jet_pars["applyJER"] == 1) and corrections is not None: if events.metadata["dataset"].startswith("data_Single"): era = [ key for key in corrections["DATA"].keys() if events.metadata["dataset"].split(year)[-1] in key ] if year == "2016APV": if (("Bv2" in events.metadata["dataset"]) or ("C" in events.metadata["dataset"]) or ("D" in events.metadata["dataset"])): era = ["BCD"] elif (("E" in events.metadata["dataset"]) or ("F" in events.metadata["dataset"])): era = ["EF"] else: raise ValueError("Era not found for 2016APV dataset.") if year == "2016": if (("F" in events.metadata["dataset"]) or ("G" in events.metadata["dataset"]) or ("H" in events.metadata["dataset"])): era = ["FGH"] else: raise ValueError("Era not found for 2016 dataset.") if len(era) != 1: raise ValueError("Only one era should be used for %s" % events.metadata["dataset"]) jet_factory = corrections["DATA"][era[0]]["JetsFactory"] met_factory = corrections["DATA"][era[0]]["METFactory"] else: jet_factory = corrections["MC"]["JetsFactory"] met_factory = corrections["MC"]["METFactory"] cache = LRUCache(int(1e10), lambda a: a.nbytes) corrected_jets = jet_factory.build(jets, lazy_cache=cache) corrected_met = met_factory.build(events["MET"], corrected_jets, lazy_cache=cache) else: corrected_jets = jets corrected_met = events["MET"] return corrected_jets, corrected_met
def test_astype_complex(): content_float64 = ak.layout.NumpyArray( np.array([0.25, 0.5, 3.5, 4.5, 5.5], dtype=np.float64) ) assert content_float64.argmin() == 0 assert content_float64.argmax() == 4 array_float64 = ak.layout.UnmaskedArray(content_float64) assert ak.to_list(array_float64) == [0.25, 0.5, 3.5, 4.5, 5.5] assert str(ak.type(content_float64)) == "float64" assert str(ak.type(ak.Array(content_float64))) == "5 * float64" assert str(ak.type(array_float64)) == "?float64" assert str(ak.type(ak.Array(array_float64))) == "5 * ?float64" assert np.can_cast(np.float32, np.float64) is True assert np.can_cast(np.float64, np.float32, "unsafe") is True assert np.can_cast(np.float64, np.int8, "unsafe") is True assert np.can_cast(np.float64, np.complex64, "unsafe") is True assert np.can_cast(np.float64, np.complex128, "unsafe") is True assert np.can_cast(np.complex64, np.float64, "unsafe") is True assert np.can_cast(np.complex128, np.float64, "unsafe") is True content_complex64 = ak.values_astype(content_float64, "complex64", highlevel=False) array_complex64 = ak.layout.UnmaskedArray(content_complex64) assert ak.to_list(content_complex64) == [ (0.25 + 0j), (0.5 + 0j), (3.5 + 0j), (4.5 + 0j), (5.5 + 0j), ] assert ak.to_list(ak.nplike.of(array_complex64).asarray(array_complex64)) == [ (0.25 + 0.0j), (0.5 + 0.0j), (3.5 + 0.0j), (4.5 + 0.0j), (5.5 + 0.0j), ] assert str(ak.type(content_complex64)) == "complex64" assert str(ak.type(ak.Array(content_complex64))) == "5 * complex64" assert str(ak.type(array_complex64)) == "?complex64" assert str(ak.type(ak.Array(array_complex64))) == "5 * ?complex64" content = ak.layout.NumpyArray( np.array([1, (2.2 + 0.1j), 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) ) assert ak.to_list(content) == [ (1 + 0j), (2.2 + 0.1j), (3.3 + 0j), (4.4 + 0j), (5.5 + 0j), (6.6 + 0j), (7.7 + 0j), (8.8 + 0j), (9.9 + 0j), ] assert content_complex64.prod() == (10.828125 + 0j) assert content_complex64.min() == (0.25 + 0j) assert content_complex64.max() == (5.5 + 0j) assert content_complex64.argmin() == 0 assert content_complex64.argmax() == 4
def process(self, events): output = self.accumulator.identity() dataset = events.metadata['dataset'] output['sumw'][dataset] += ak.sum(np.sign(events.Generator.weight)) output['nevents'][dataset] += len(events) output = self.accumulator.identity() dataset = events.metadata['dataset'] if dataset not in ['singleelectron','singlemuon','egamma']: output['sumw'][dataset] += ak.sum(np.sign(events.Generator.weight)) output['nevents'][dataset] += len(events) if dataset in ['singleelectron','singlemuon','egamma']: events = events[lumimask(events.run,events.luminosityBlock)] events = events[(events.PuppiMET.pt > 30) | (events.PuppiMET.ptJERUp > 30) | (events.PuppiMET.ptJESUp > 30)] if year == "2016": if dataset == 'singlemuon': events = events[events.HLT.IsoTkMu24 | events.HLT.IsoMu24] elif dataset == 'singleelectron': events = events[vents.HLT.IsoTkMu24 | events.HLT.IsoMu24 | events.HLT.Ele27_WPTight_Gsf] else: events = events[events.HLT.IsoTkMu24 | events.HLT.IsoMu24 | events.HLT.Ele27_WPTight_Gsf] elif year == "2017": if dataset == 'singlemuon': events = events[events.HLT.IsoMu27] elif dataset == 'singleelectron': events = events[events.HLT.Ele32_WPTight_Gsf_L1DoubleEG] else: events = events[events.HLT.IsoMu27 | events.HLT.Ele32_WPTight_Gsf_L1DoubleEG] elif year == "2018": if dataset == 'singlemuon': events = events[events.HLT.IsoMu24] elif dataset == 'egamma': events = events[events.HLT.Ele32_WPTight_Gsf] else: events = events[events.HLT.IsoMu24 |events.HLT.Ele32_WPTight_Gsf] events = events[(ak.num(events.Jet) > 3) | ((ak.num(events.Jet) > 1) & (ak.num(events.FatJet) > 0))] events = events[(ak.num(events.Electron) > 0) | (ak.num(events.Muon) > 0)] tight_muons = events.Muon[events.Muon.tightId & (events.Muon.pfRelIso04_all < 0.15) & (events.Muon.pt > 26) & (abs(events.Muon.eta) < 2.4)] loose_not_tight_muons = events.Muon[events.Muon.tightId & (events.Muon.pfRelIso04_all < 0.4) & (events.Muon.pfRelIso04_all > 0.15) & (events.Muon.pt > 20) & (abs(events.Muon.eta) < 2.4)] tight_electrons = events.Electron[(events.Electron.pt > 30) & (events.Electron.cutBased >= 3) & (events.Electron.eta + events.Electron.deltaEtaSC < 2.5) & ((abs(events.Electron.dz) < 0.1) & (abs(events.Electron.dxy) < 0.05) & (events.Electron.eta + events.Electron.deltaEtaSC < 1.479)) | ((abs(events.Electron.dz) < 0.2) & (abs(events.Electron.dxy) < 0.1) & (events.Electron.eta + events.Electron.deltaEtaSC > 1.479))] name_map = jec_stack.blank_name_map name_map['JetPt'] = 'pt' name_map['JetMass'] = 'mass' name_map['JetEta'] = 'eta' name_map['JetA'] = 'area' jets = events.Jet jets['pt_raw'] = (1 - jets['rawFactor']) * jets['pt'] jets['mass_raw'] = (1 - jets['rawFactor']) * jets['mass'] jets['pt_gen'] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) jets['rho'] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] name_map['ptGenJet'] = 'pt_gen' name_map['ptRaw'] = 'pt_raw' name_map['massRaw'] = 'mass_raw' name_map['Rho'] = 'rho' events_cache = events.caches[0] jet_factory = CorrectedJetsFactory(name_map, jec_stack) corrected_jets = jet_factory.build(jets, lazy_cache=events_cache) jet_pt = corrected_jets.pt jet_pt_jesup = corrected_jets.JES_jes.up.pt jet_pt_jerup = corrected_jets.JER.up.pt corrected_jets = ak.zip({ "pt": corrected_jets.pt, "eta": corrected_jets.eta, "phi": corrected_jets.phi, "mass": corrected_jets.mass, "charge": np.ones(len(corrected_jets.pt)), "btagDeepB": corrected_jets.btagDeepB }, with_name="PtEtaPhiMCandidate") fatjets = events.FatJet[(events.FatJet.pt > 250) & (abs(events.FatJet.eta) < 2.5) & (events.FatJet.msoftdrop > 50) & (events.FatJet.msoftdrop < 150)] b_jets = corrected_jets[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 2.5) & (events.Jet.btagDeepB > 0.8953)] vbf_jets = corrected_jets[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7) & (events.Jet.btagDeepB < 0.2217)] nextrajets = ak.num(events.Jet[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7)]) - 4 nextrabjets = ak.num(events.Jet[(events.Jet.cleanmask == 1) & (jet_pt > 30) & (abs(events.Jet.eta) < 4.7) & (events.Jet.btagDeepB > 0.2217)]) - 2 basecut_merged = (ak.num(fatjets) > 0) & (ak.num(vbf_jets) > 1) & (ak.num(tight_muons) + ak.num(tight_electrons) == 1) & (ak.num(loose_not_tight_muons) == 0) & (events.PuppiMET.pt > 30) events_merged = events[basecut_merged] fatjets_merged = fatjets[basecut_merged] vbf_jets_merged = vbf_jets[basecut_merged] tight_muons_merged = tight_muons[basecut_merged] tight_electrons_merged = tight_electrons[basecut_merged] nextrajets_merged = nextrajets[basecut_merged] nextrabjets_merged = nextrabjets[basecut_merged] basecut = (ak.num(b_jets) > 1) & (ak.num(vbf_jets) > 1) & (ak.num(tight_muons) + ak.num(tight_electrons) == 1) & (ak.num(loose_not_tight_muons) == 0) & (events.PuppiMET.pt > 30) events = events[basecut] b_jets = b_jets[basecut] vbf_jets = vbf_jets[basecut] tight_muons = tight_muons[basecut] tight_electrons = tight_electrons[basecut] nextrajets = nextrajets[basecut] nextrabjets = nextrabjets[basecut] if dataset in ['singleelectron','singlemuon','egamma']: dataset = 'data' if ak.any(basecut_merged): cut7 = (fatjets_merged[:,0].mass > 50) & (fatjets_merged[:,0].mass < 150) & ((vbf_jets_merged[:,0]+vbf_jets_merged[:,1]).mass > 500) & (abs(vbf_jets_merged[:,0].eta - vbf_jets_merged[:,1].eta) > 2.5) & (ak.num(tight_muons_merged) > 0) cut8 = (fatjets_merged[:,0].mass > 50) & (fatjets_merged[:,0].mass < 150) & ((vbf_jets_merged[:,0]+vbf_jets_merged[:,1]).mass > 500) & (abs(vbf_jets_merged[:,0].eta - vbf_jets_merged[:,1].eta) > 2.5) & (ak.num(tight_electrons_merged) > 0) # cut9 = cut7 | cut8 cut1 = ((b_jets[:,0] + b_jets[:,1]).mass > 50) & ((b_jets[:,0] + b_jets[:,1]).mass < 150) & ((vbf_jets[:,0] + vbf_jets[:,1]).mass > 500) & (abs(vbf_jets[:,0].eta - vbf_jets[:,1].eta) > 2.5) & (ak.num(tight_muons) > 0) cut2 = ((b_jets[:,0] + b_jets[:,1]).mass > 50) & ((b_jets[:,0] + b_jets[:,1]).mass < 150) & ((vbf_jets[:,0] + vbf_jets[:,1]).mass > 500) & (abs(vbf_jets[:,0].eta - vbf_jets[:,1].eta) > 2.5) & (ak.num(tight_electrons) > 0) # cut3 = cut1 | cut2 if ak.any(basecut_merged) and ak.any(cut7): sel7_events = events_merged[cut7] sel7_fatjets = fatjets_merged[cut7] sel7_vbf_jets = vbf_jets_merged[cut7] sel7_muons = tight_muons_merged[cut7][:,0] sel7_nextrajets = nextrajets_merged[cut7] sel7_nextrabjets = nextrabjets_merged[cut7] output["weights_merged"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel7_events.Generator.weight).data)) output['variables_merged'][dataset] += processor.column_accumulator(np.transpose(np.vstack(( ak.to_numpy(sel7_fatjets[:,0].pt), ak.to_numpy(sel7_fatjets[:,0].eta), ak.to_numpy(sel7_fatjets[:,0].phi), ak.to_numpy(sel7_fatjets[:,0].btagDeepB), ak.to_numpy(sel7_fatjets[:,0].btagHbb), ak.to_numpy(sel7_fatjets[:,0].msoftdrop), ak.to_numpy(sel7_nextrajets), ak.to_numpy(sel7_nextrabjets), np.zeros(len(sel7_events)), np.sign(ak.to_numpy(sel7_muons.charge)+1), ak.to_numpy(sel7_muons.pt), ak.to_numpy(sel7_muons.eta), ak.to_numpy(sel7_muons.phi), ak.to_numpy(sel7_events.PuppiMET.pt), ak.to_numpy(sel7_events.PuppiMET.phi), ak.to_numpy(sel7_vbf_jets[:,0].pt), ak.to_numpy(sel7_vbf_jets[:,1].pt), ak.to_numpy(sel7_vbf_jets[:,0].eta), ak.to_numpy(sel7_vbf_jets[:,1].eta), ak.to_numpy(sel7_vbf_jets[:,0].phi), ak.to_numpy(sel7_vbf_jets[:,1].phi), ak.to_numpy(sel7_vbf_jets[:,0].btagDeepB), ak.to_numpy(sel7_vbf_jets[:,1].btagDeepB), ak.to_numpy((sel7_vbf_jets[:,0]+sel7_vbf_jets[:,1]).mass), ak.to_numpy(sel7_vbf_jets[:,0].eta - sel7_vbf_jets[:,1].eta), ak.to_numpy(np.sqrt(2*(sel7_muons+sel7_vbf_jets[:,0]).pt*sel7_events.PuppiMET.pt*(1 - np.cos(sel7_events.PuppiMET.phi - (sel7_muons+sel7_vbf_jets[:,0]).phi)))), ak.to_numpy(np.sqrt(2*(sel7_muons+sel7_vbf_jets[:,1]).pt*sel7_events.PuppiMET.pt*(1 - np.cos(sel7_events.PuppiMET.phi - (sel7_muons+sel7_vbf_jets[:,1]).phi)))))))) sel7_muonidsf = evaluator['muonidsf'](abs(sel7_muons.eta), sel7_muons.pt) sel7_muonisosf = evaluator['muonisosf'](abs(sel7_muons.eta), sel7_muons.pt) sel7_muonhltsf = evaluator['muonhltsf'](abs(sel7_muons.eta), sel7_muons.pt) sel7_weight = np.sign(sel7_events.Generator.weight)*sel7_events.L1PreFiringWeight.Nom*sel7_muonidsf*sel7_muonisosf*sel7_muonhltsf if ak.any(basecut_merged) and ak.any(cut8): sel8_events = events_merged[cut8] sel8_fatjets = fatjets_merged[cut8] sel8_vbf_jets = vbf_jets_merged[cut8] sel8_electrons = tight_electrons_merged[cut8][:,0] sel8_nextrajets = nextrajets_merged[cut8] sel8_nextrabjets = nextrabjets_merged[cut8] output["weights_merged"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel8_events.Generator.weight).data)) output['variables_merged'][dataset] += processor.column_accumulator(np.transpose(np.vstack(( ak.to_numpy(sel8_fatjets[:,0].pt), ak.to_numpy(sel8_fatjets[:,0].eta), ak.to_numpy(sel8_fatjets[:,0].phi), ak.to_numpy(sel8_fatjets[:,0].btagDeepB), ak.to_numpy(sel8_fatjets[:,0].btagHbb), ak.to_numpy(sel8_fatjets[:,0].msoftdrop), ak.to_numpy(sel8_nextrajets), ak.to_numpy(sel8_nextrabjets), np.ones(len(sel8_events)), np.sign(ak.to_numpy(sel8_electrons.charge)+1), ak.to_numpy(sel8_electrons.pt), ak.to_numpy(sel8_electrons.eta), ak.to_numpy(sel8_electrons.phi), ak.to_numpy(sel8_events.PuppiMET.pt), ak.to_numpy(sel8_events.PuppiMET.phi), ak.to_numpy(sel8_vbf_jets[:,0].pt), ak.to_numpy(sel8_vbf_jets[:,1].pt), ak.to_numpy(sel8_vbf_jets[:,0].eta), ak.to_numpy(sel8_vbf_jets[:,1].eta), ak.to_numpy(sel8_vbf_jets[:,0].phi), ak.to_numpy(sel8_vbf_jets[:,1].phi), ak.to_numpy(sel8_vbf_jets[:,0].btagDeepB), ak.to_numpy(sel8_vbf_jets[:,1].btagDeepB), ak.to_numpy((sel8_vbf_jets[:,0]+sel8_vbf_jets[:,1]).mass), ak.to_numpy(sel8_vbf_jets[:,0].eta - sel8_vbf_jets[:,1].eta), ak.to_numpy(np.sqrt(2*(sel8_electrons+sel8_vbf_jets[:,0]).pt*sel8_events.PuppiMET.pt*(1 - np.cos(sel8_events.PuppiMET.phi - (sel8_electrons+sel8_vbf_jets[:,0]).phi)))), ak.to_numpy(np.sqrt(2*(sel8_electrons+sel8_vbf_jets[:,1]).pt*sel8_events.PuppiMET.pt*(1 - np.cos(sel8_events.PuppiMET.phi - (sel8_electrons+sel8_vbf_jets[:,1]).phi)))))))) sel8_electronidsf = evaluator['electronidsf'](sel8_electrons.eta, sel8_electrons.pt) sel8_electronrecosf = evaluator['electronrecosf'](sel8_electrons.eta, sel8_electrons.pt) sel8_weight = np.sign(sel8_events.Generator.weight)*sel8_events.L1PreFiringWeight.Nom*sel8_electronidsf*sel8_electronrecosf if ak.any(basecut) and ak.any(cut1): sel1_events = events[cut1] sel1_b_jets = b_jets[cut1] sel1_vbf_jets = vbf_jets[cut1] sel1_muons = tight_muons[cut1][:,0] sel1_nextrajets = nextrajets[cut1] sel1_nextrabjets = nextrabjets[cut1] output["weights"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel1_events.Generator.weight).data)) output['variables'][dataset] += processor.column_accumulator(np.transpose(np.vstack(( ak.to_numpy(sel1_nextrajets), ak.to_numpy(sel1_nextrabjets), np.zeros(len(sel1_events)), np.sign(ak.to_numpy(sel1_muons.charge)+1), ak.to_numpy(sel1_muons.pt), ak.to_numpy(sel1_muons.eta), ak.to_numpy(sel1_muons.phi), ak.to_numpy(sel1_events.PuppiMET.pt), ak.to_numpy(sel1_events.PuppiMET.phi), ak.to_numpy(sel1_b_jets[:,0].pt), ak.to_numpy(sel1_b_jets[:,1].pt), ak.to_numpy(sel1_vbf_jets[:,0].pt), ak.to_numpy(sel1_vbf_jets[:,1].pt), ak.to_numpy(sel1_b_jets[:,0].eta), ak.to_numpy(sel1_b_jets[:,1].eta), ak.to_numpy(sel1_vbf_jets[:,0].eta), ak.to_numpy(sel1_vbf_jets[:,1].eta), ak.to_numpy(sel1_b_jets[:,0].phi), ak.to_numpy(sel1_b_jets[:,1].phi), ak.to_numpy(sel1_vbf_jets[:,0].phi), ak.to_numpy(sel1_vbf_jets[:,1].phi), ak.to_numpy(sel1_b_jets[:,0].btagDeepB), ak.to_numpy(sel1_b_jets[:,1].btagDeepB), ak.to_numpy(sel1_vbf_jets[:,0].btagDeepB), ak.to_numpy(sel1_vbf_jets[:,1].btagDeepB), ak.to_numpy((sel1_b_jets[:,0]+sel1_b_jets[:,1]).mass), ak.to_numpy((sel1_vbf_jets[:,0]+sel1_vbf_jets[:,1]).mass), ak.to_numpy(sel1_vbf_jets[:,0].eta - sel1_vbf_jets[:,1].eta), ak.to_numpy(np.sqrt(2*(sel1_muons+sel1_b_jets[:,0]).pt*sel1_events.PuppiMET.pt*(1 - np.cos(sel1_events.PuppiMET.phi - (sel1_muons+sel1_b_jets[:,0]).phi)))),ak.to_numpy(np.sqrt(2*(sel1_muons+sel1_b_jets[:,1]).pt*sel1_events.PuppiMET.pt*(1 - np.cos(sel1_events.PuppiMET.phi - (sel1_muons+sel1_b_jets[:,1]).phi)))))))) sel1_pu_weight = evaluator['pileup'](sel1_events.Pileup.nTrueInt) sel1_muonidsf = evaluator['muonidsf'](abs(sel1_muons.eta), sel1_muons.pt) sel1_muonisosf = evaluator['muonisosf'](abs(sel1_muons.eta), sel1_muons.pt) sel1_muonhltsf = evaluator['muonhltsf'](abs(sel1_muons.eta), sel1_muons.pt) sel1_weight = np.sign(sel1_events.Generator.weight)*sel1_pu_weight*sel1_events.L1PreFiringWeight.Nom*sel1_muonidsf*sel1_muonisosf*sel1_muonhltsf if ak.any(basecut) and ak.any(cut2): sel2_events = events[cut2] sel2_b_jets = b_jets[cut2] sel2_vbf_jets = vbf_jets[cut2] sel2_electrons = tight_electrons[cut2][:,0] sel2_nextrajets = nextrajets[cut2] sel2_nextrabjets = nextrabjets[cut2] output["weights"][dataset] += processor.column_accumulator(np.sign(ak.to_numpy(sel2_events.Generator.weight).data)) output['variables'][dataset] += processor.column_accumulator(np.transpose(np.vstack(( ak.to_numpy(sel2_nextrajets), ak.to_numpy(sel2_nextrabjets), np.ones(len(sel2_events)), np.sign(ak.to_numpy(sel2_electrons.charge)+1), ak.to_numpy(sel2_electrons.pt), ak.to_numpy(sel2_electrons.eta), ak.to_numpy(sel2_electrons.phi), ak.to_numpy(sel2_events.PuppiMET.pt), ak.to_numpy(sel2_events.PuppiMET.phi), ak.to_numpy(sel2_b_jets[:,0].pt), ak.to_numpy(sel2_b_jets[:,1].pt), ak.to_numpy(sel2_vbf_jets[:,0].pt), ak.to_numpy(sel2_vbf_jets[:,1].pt), ak.to_numpy(sel2_b_jets[:,0].eta), ak.to_numpy(sel2_b_jets[:,1].eta), ak.to_numpy(sel2_vbf_jets[:,0].eta), ak.to_numpy(sel2_vbf_jets[:,1].eta), ak.to_numpy(sel2_b_jets[:,0].phi), ak.to_numpy(sel2_b_jets[:,1].phi), ak.to_numpy(sel2_vbf_jets[:,0].phi), ak.to_numpy(sel2_vbf_jets[:,1].phi), ak.to_numpy(sel2_b_jets[:,0].btagDeepB), ak.to_numpy(sel2_b_jets[:,1].btagDeepB), ak.to_numpy(sel2_vbf_jets[:,0].btagDeepB), ak.to_numpy(sel2_vbf_jets[:,1].btagDeepB), ak.to_numpy((sel2_b_jets[:,0]+sel2_b_jets[:,1]).mass), ak.to_numpy((sel2_vbf_jets[:,0]+sel2_vbf_jets[:,1]).mass), ak.to_numpy(sel2_vbf_jets[:,0].eta - sel2_vbf_jets[:,1].eta), ak.to_numpy(np.sqrt(2*(sel2_electrons+sel2_b_jets[:,0]).pt*sel2_events.PuppiMET.pt*(1 - np.cos(sel2_events.PuppiMET.phi - (sel2_electrons+sel2_b_jets[:,0]).phi)))),ak.to_numpy(np.sqrt(2*(sel2_electrons+sel2_b_jets[:,1]).pt*sel2_events.PuppiMET.pt*(1 - np.cos(sel2_events.PuppiMET.phi - (sel2_electrons+sel2_b_jets[:,1]).phi)))))))) sel2_pu_weight = evaluator['pileup'](sel2_events.Pileup.nTrueInt) sel2_electronidsf = evaluator['electronidsf'](sel2_electrons.eta, sel2_electrons.pt) sel2_electronrecosf = evaluator['electronrecosf'](sel2_electrons.eta, sel2_electrons.pt) sel2_weight = np.sign(sel2_events.Generator.weight)*sel2_pu_weight*sel2_events.L1PreFiringWeight.Nom*sel2_electronidsf*sel2_electronrecosf return output
def test_corrected_jets_factory(): import os from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack events = None cache = {} from coffea.nanoevents import NanoEventsFactory factory = NanoEventsFactory.from_root( os.path.abspath('tests/samples/nano_dy.root')) events = factory.events() jec_stack_names = [ 'Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi', 'Spring16_25nsV10_MC_PtResolution_AK4PFPuppi', 'Spring16_25nsV10_MC_SF_AK4PFPuppi' ] for key in evaluator.keys(): if 'Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi' in key: jec_stack_names.append(key) jec_inputs = {name: evaluator[name] for name in jec_stack_names} jec_stack = JECStack(jec_inputs) name_map = jec_stack.blank_name_map name_map['JetPt'] = 'pt' name_map['JetMass'] = 'mass' name_map['JetEta'] = 'eta' name_map['JetA'] = 'area' jets = events.Jet jets['pt_raw'] = (1 - jets['rawFactor']) * jets['pt'] jets['mass_raw'] = (1 - jets['rawFactor']) * jets['mass'] jets['pt_gen'] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) jets['rho'] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] name_map['ptGenJet'] = 'pt_gen' name_map['ptRaw'] = 'pt_raw' name_map['massRaw'] = 'mass_raw' name_map['Rho'] = 'rho' events_cache = events.caches[0] print(name_map) tic = time.time() jet_factory = CorrectedJetsFactory(name_map, jec_stack) toc = time.time() print('setup corrected jets time =', toc - tic) tic = time.time() #prof = pyinstrument.Profiler() #prof.start() corrected_jets = jet_factory.build(jets, lazy_cache=events_cache) #prof.stop() toc = time.time() print('corrected_jets build time =', toc - tic) #sprint(prof.output_text(unicode=True, color=True, show_all=True)) tic = time.time() print(corrected_jets.pt_orig) print(corrected_jets.pt) for unc in jet_factory.uncertainties(): print(unc) print(corrected_jets[unc].up.pt) print(corrected_jets[unc].down.pt) toc = time.time() print('build all jet variations =', toc - tic) name_map['METpt'] = 'pt' name_map['METphi'] = 'phi' name_map['METx'] = 'x' name_map['METy'] = 'y' name_map['JETx'] = 'x' name_map['JETy'] = 'y' name_map['xMETRaw'] = 'x_raw' name_map['yMETRaw'] = 'y_raw' name_map['UnClusteredEnergyDeltaX'] = 'MetUnclustEnUpDeltaX' name_map['UnClusteredEnergyDeltaY'] = 'MetUnclustEnUpDeltaY' tic = time.time() met_factory = CorrectedMETFactory(name_map) toc = time.time() print('setup corrected MET time =', toc - tic) met = events.MET tic = time.time() #prof = pyinstrument.Profiler() #prof.start() corrected_met = met_factory.build(met, corrected_jets, lazy_cache=events_cache) #prof.stop() toc = time.time() #print(prof.output_text(unicode=True, color=True, show_all=True)) print('corrected_met build time =', toc - tic) tic = time.time() print(corrected_met.pt_orig) print(corrected_met.pt) for unc in (jet_factory.uncertainties() + met_factory.uncertainties()): print(unc) print(corrected_met[unc].up.pt) print(corrected_met[unc].down.pt) toc = time.time() print('build all met variations =', toc - tic)
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = sample_name #events.metadata['dataset'] # Data or MC isData = 'genWeight' not in events.fields #Stop processing if there is no event remain if len(events) == 0: return out # Golden Json file if (self._year == "2018") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABCD" if (self._year == "2017") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" # <----- Get Scale factors ------># if not isData: # Egamma reco ID get_ele_reco_above20_sf = self._corrections[ 'get_ele_reco_above20_sf'][self._year] get_ele_medium_id_sf = self._corrections['get_ele_medium_id_sf'][ self._year] get_pho_medium_id_sf = self._corrections['get_pho_medium_id_sf'][ self._year] # DoubleEG trigger # 2016, 2017 are not applied yet if self._year == "2018": get_ele_trig_leg1_SF = self._corrections[ 'get_ele_trig_leg1_SF'][self._year] get_ele_trig_leg1_data_Eff = self._corrections[ 'get_ele_trig_leg1_data_Eff'][self._year] get_ele_trig_leg1_mc_Eff = self._corrections[ 'get_ele_trig_leg1_mc_Eff'][self._year] get_ele_trig_leg2_SF = self._corrections[ 'get_ele_trig_leg2_SF'][self._year] get_ele_trig_leg2_data_Eff = self._corrections[ 'get_ele_trig_leg2_data_Eff'][self._year] get_ele_trig_leg2_mc_Eff = self._corrections[ 'get_ele_trig_leg2_mc_Eff'][self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64") pu = self._puweight_arr[pu_weight_idx] selection = processor.PackedSelection() # Cut flow cut0 = np.zeros(len(events)) # <----- Helper functions ------># # Sort by PT helper function def sort_by_pt(ele, pho, jet): ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)] pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)] jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)] return ele, pho, jet # Lorentz vectors from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) def TLorentz_vector(vec): vec = ak.zip({ "x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t }, with_name="LorentzVector") return vec def TLorentz_vector_cylinder(vec): vec = ak.zip( { "pt": vec.pt, "eta": vec.eta, "phi": vec.phi, "mass": vec.mass, }, with_name="PtEtaPhiMLorentzVector", ) return vec # Cut-based ID modification @numba.njit def PhotonVID(vid, idBit): rBit = 0 for x in range(0, 7): rBit |= (1 << x) if ((vid >> (x * 2)) & 0b11 >= idBit) else 0 return rBit # Inverse Sieie and upper limit @numba.njit def make_fake_obj_mask(Pho, builder): #for eventIdx,pho in enumerate(tqdm(Pho)): # --Event Loop for eventIdx, pho in enumerate(Pho): builder.begin_list() if len(pho) < 1: continue for phoIdx, _ in enumerate(pho): # --Photon Loop vid = Pho[eventIdx][phoIdx].vidNestedWPBitmap vid_cuts1 = PhotonVID(vid, 1) # Loose photon vid_cuts2 = PhotonVID(vid, 2) # Medium photon vid_cuts3 = PhotonVID(vid, 3) # Tight photon # Field name # |0|0|0|0|0|0|0| # |IsoPho|IsoNeu|IsoChg|Sieie|hoe|scEta|PT| # 1. Turn off cut (ex turn off Sieie # |1|1|1|0|1|1|1| = |1|1|1|0|1|1|1| # 2. Inverse cut (ex inverse Sieie) # |1|1|1|1|1|1|1| = |1|1|1|0|1|1|1| #if (vid_cuts2 & 0b1111111 == 0b1111111): # Cut applied #if (vid_cuts2 & 0b1111111 == 0b1110111): # Inverse Sieie if (vid_cuts2 & 0b1110111 == 0b1110111): # Without Sieie builder.boolean(True) else: builder.boolean(False) builder.end_list() return builder # <----- Selection ------># Initial_events = events # Good Run ( Golden Json files ) from coffea import lumi_tools if isData: lumi_mask_builder = lumi_tools.LumiMask(injson) lumimask = ak.Array( lumi_mask_builder.__call__(events.run, events.luminosityBlock)) events = events[lumimask] #print("{0}% of files pass good-run conditions".format(len(events)/ len(Initial_events))) # Stop processing if there is no event remain if len(events) == 0: return out ##----------- Cut flow1: Passing Triggers # double lepton trigger is_double_ele_trigger = True if not is_double_ele_trigger: double_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[ path] # single lepton trigger is_single_ele_trigger = True if not is_single_ele_trigger: single_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[ path] events.Electron, events.Photon, events.Jet = sort_by_pt( events.Electron, events.Photon, events.Jet) # Good Primary vertex nPV = events.PV.npvsGood if not isData: nPV = nPV * pu nPV_nw = nPV # Apply cut1 events = events[double_ele_triggers_arr] if not isData: pu = pu[double_ele_triggers_arr] cut1 = np.ones(len(events)) # Set Particles Electron = events.Electron Muon = events.Muon Photon = events.Photon MET = events.MET Jet = events.Jet # Stop processing if there is no event remain if len(Electron) == 0: return out # --Gen Photon for dR genparts = events.GenPart pdgID_mask = (genparts.pdgId == 22) # mask2: isPrompt | fromHardProcess | isLastCopy mask2 = (1 << 0) | (1 << 8) | (1 << 13) # https://github.com/PKUHEPEWK/WGamma/blob/master/2018/wgRealPhotonTemplateModule.py status_mask = ((genparts.statusFlags & mask2) == mask2) gen_photons = genparts[pdgID_mask & status_mask] assert (ak.all(ak.num(gen_photons) == 1) ) # Raise error if len(gen_photon) != 1 # --Muon ( only used to calculate dR ) MuSelmask = (Muon.pt >= 10) & (abs( Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15) Muon = Muon[MuSelmask] ##----------- Cut flow2: Electron Selection EleSelmask = ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1)) | \ ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2)) Electron = Electron[EleSelmask] # apply cut 2 Tri_electron_mask = ak.num(Electron) >= 2 Electron = Electron[Tri_electron_mask] Photon = Photon[Tri_electron_mask] Jet = Jet[Tri_electron_mask] MET = MET[Tri_electron_mask] Muon = Muon[Tri_electron_mask] if not isData: pu = pu[Tri_electron_mask] events = events[Tri_electron_mask] gen_photons = gen_photons[Tri_electron_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut2 = np.ones(len(Photon)) * 2 ##----------- Cut flow3: Photon Selection # Basic photon selection isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5)) Pixel_seed_mask = ~Photon.pixelSeed PT_mask = Photon.pt >= 20 # dR cut with selected Muon and Electrons dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5, axis=-1) # default metric table: delta_r dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1) PhoSelmask = PT_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask Photon = Photon[PhoSelmask] # Apply cut 3 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] gen_photons = gen_photons[A_photon_mask] Photon_template_mask = make_fake_obj_mask( Photon, ak.ArrayBuilder()).snapshot() Photon = Photon[Photon_template_mask] # Apply cut 3 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] gen_photons = gen_photons[A_photon_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut3 = np.ones(len(Photon)) * 3 ## -- Additional photon selection: Photon gen-matching # Choose Photons that dR(genPhoton,Photon) <= 0.1 gen_match_photon_mask = ak.all(Photon.metric_table(gen_photons) <= 0.1, axis=-1) # Apply cut Photon = Photon[gen_match_photon_mask] gen_match_photon_evt_mask = ak.num(Photon) >= 1 Electron = Electron[gen_match_photon_evt_mask] Photon = Photon[gen_match_photon_evt_mask] Jet = Jet[gen_match_photon_evt_mask] MET = MET[gen_match_photon_evt_mask] gen_photons = gen_photons[gen_match_photon_evt_mask] if not isData: pu = pu[gen_match_photon_evt_mask] events = events[gen_match_photon_evt_mask] ##----------- Cut flow4: Select 2 OSSF electrons from Z @numba.njit def find_2lep(events_leptons, builder): for leptons in events_leptons: builder.begin_list() nlep = len(leptons) for i0 in range(nlep): for i1 in range(i0 + 1, nlep): if leptons[i0].charge + leptons[i1].charge != 0: continue if nlep == 2: builder.begin_tuple(2) builder.index(0).integer(i0) builder.index(1).integer(i1) builder.end_tuple() else: for i2 in range(nlep): if len({i0, i1, i2}) < 3: continue builder.begin_tuple(3) builder.index(0).integer(i0) builder.index(1).integer(i1) builder.index(2).integer(i2) builder.end_tuple() builder.end_list() return builder ossf_idx = find_2lep(Electron, ak.ArrayBuilder()).snapshot() # OSSF cut ossf_mask = ak.num(ossf_idx) >= 1 ossf_idx = ossf_idx[ossf_mask] Electron = Electron[ossf_mask] Photon = Photon[ossf_mask] Jet = Jet[ossf_mask] MET = MET[ossf_mask] events = events[ossf_mask] if not isData: pu = pu[ossf_mask] Double_electron = [Electron[ossf_idx[idx]] for idx in "01"] from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) Diele = ak.zip({ "lep1": Double_electron[0], "lep2": Double_electron[1], "p4": TLorentz_vector(Double_electron[0] + Double_electron[1]) }) bestZ_idx = ak.singletons( ak.argmin(abs(Diele.p4.mass - 91.1876), axis=1)) Diele = Diele[bestZ_idx] # Stop processing if there is no event remain if len(Electron) == 0: return out cut4 = np.ones(len(Electron)) * 4 leading_ele = Diele.lep1 subleading_ele = Diele.lep2 def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -- Scale Factor for each electron # Trigger weight helper function def Trigger_Weight(eta1, pt1, eta2, pt2): per_ev_MC =\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\ get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2) per_ev_data =\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\ get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) return per_ev_data / per_ev_MC if not isData: ## -------------< Egamma ID and Reco Scale factor > -----------------## get_pho_medium_id_sf = get_pho_medium_id_sf( ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt)) ele_reco_sf = get_ele_reco_above20_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt)) * get_ele_reco_above20_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt)) ele_medium_id_sf = get_ele_medium_id_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt)) * get_ele_medium_id_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt)) ## -------------< Double Electron Trigger Scale factor > -----------------## eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta) eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta) pt1 = ak.flatten(leading_ele.pt) pt2 = ak.flatten(subleading_ele.pt) # -- 2017,2016 are not applied yet if self._year == '2018': ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2) ##----------- Cut flow5: Event selection # Mee cut Mee_cut_mask = ak.firsts(Diele.p4.mass) > 4 # Electron PT cuts Elept_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20)) # MET cuts MET_mask = MET.pt > 20 # --------Mask -------# Event_sel_mask = Mee_cut_mask & Elept_mask & MET_mask Diele_sel = Diele[Event_sel_mask] leading_pho_sel = leading_pho[Event_sel_mask] Jet_sel = Jet[Event_sel_mask] MET_sel = MET[Event_sel_mask] # Photon EE and EB isEE_mask = leading_pho.isScEtaEE isEB_mask = leading_pho.isScEtaEB Pho_EE = leading_pho[isEE_mask & Event_sel_mask] Pho_EB = leading_pho[isEB_mask & Event_sel_mask] #Stop processing if there is no event remain if len(leading_pho_sel) == 0: return out cut5 = np.ones(len(Diele)) * 5 # -------------------- Flatten variables ---------------------------# # -- Ele1 --# Ele1_PT = ak.flatten(Diele_sel.lep1.pt) Ele1_Eta = ak.flatten(Diele_sel.lep1.eta) Ele1_Phi = ak.flatten(Diele_sel.lep1.phi) # -- Ele2 --# Ele2_PT = ak.flatten(Diele_sel.lep2.pt) Ele2_Eta = ak.flatten(Diele_sel.lep2.eta) Ele2_Phi = ak.flatten(Diele_sel.lep2.phi) # -- Pho -- # Pho_PT = ak.flatten(leading_pho_sel.pt) Pho_Eta = ak.flatten(leading_pho_sel.eta) Pho_Phi = ak.flatten(leading_pho_sel.phi) # -- Pho EB --# Pho_EB_PT = ak.flatten(Pho_EB.pt) Pho_EB_Eta = ak.flatten(Pho_EB.eta) Pho_EB_Phi = ak.flatten(Pho_EB.phi) Pho_EB_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg) Pho_EB_Sieie = ak.flatten(Pho_EE.sieie) # -- Pho EE --# Pho_EE_PT = ak.flatten(Pho_EE.pt) Pho_EE_Eta = ak.flatten(Pho_EE.eta) Pho_EE_Phi = ak.flatten(Pho_EE.phi) Pho_EE_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg) Pho_EE_Sieie = ak.flatten(Pho_EE.sieie) # --Kinematics --# Diele_mass = ak.flatten(Diele_sel.p4.mass) leading_ele, subleading_ele = ak.flatten( TLorentz_vector_cylinder(Diele_sel.lep1)), ak.flatten( TLorentz_vector_cylinder(Diele_sel.lep2)) dR_e1pho = ak.flatten( leading_ele.delta_r(leading_pho_sel)) # dR pho,ele1 dR_e2pho = ak.flatten( subleading_ele.delta_r(leading_pho_sel)) # dR pho,ele2 dR_jpho = ak.flatten(Jet_sel[:, 0].delta_r(leading_pho_sel)) MET_PT = ak.to_numpy(MET_sel.pt) # -------------------- Sieie bins---------------------------# def make_bins(pt, eta, sieie, bin_range_str): bin_dict = { 'PT_1_eta_1': (pt > 20) & (pt < 30) & (eta < 1), 'PT_1_eta_2': (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5), 'PT_1_eta_3': (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2), 'PT_1_eta_4': (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5), 'PT_2_eta_1': (pt > 30) & (pt < 40) & (eta < 1), 'PT_2_eta_2': (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5), 'PT_2_eta_3': (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2), 'PT_2_eta_4': (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5), 'PT_3_eta_1': (pt > 40) & (pt < 50) & (eta < 1), 'PT_3_eta_2': (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5), 'PT_3_eta_3': (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2), 'PT_3_eta_4': (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5), 'PT_4_eta_1': (pt > 50) & (eta < 1), 'PT_4_eta_2': (pt > 50) & (eta > 1) & (eta < 1.5), 'PT_4_eta_3': (pt > 50) & (eta > 1.5) & (eta < 2), 'PT_4_eta_4': (pt > 50) & (eta > 2) & (eta < 2.5) } binmask = bin_dict[bin_range_str] return ak.to_numpy(sieie[binmask]), binmask bin_name_list = [ 'PT_1_eta_1', 'PT_1_eta_2', 'PT_1_eta_3', 'PT_1_eta_4', 'PT_2_eta_1', 'PT_2_eta_2', 'PT_2_eta_3', 'PT_2_eta_4', 'PT_3_eta_1', 'PT_3_eta_2', 'PT_3_eta_3', 'PT_3_eta_4', 'PT_4_eta_1', 'PT_4_eta_2', 'PT_4_eta_3', 'PT_4_eta_4' ] binned_sieie_hist = {} binmask_dict = {} for name in bin_name_list: binned_sieie_hist[name], _ = make_bins( ak.flatten(leading_pho_sel.pt), ak.flatten(abs(leading_pho_sel.eta)), ak.flatten(leading_pho_sel.sieie), name) _, binmask_dict[name] = make_bins(ak.flatten(leading_pho.pt), ak.flatten(abs(leading_pho.eta)), ak.flatten(leading_pho.sieie), name) print("Show me the last bin: ", binned_sieie_hist['PT_4_eta_4']) # --- Apply weight and hist weights = processor.Weights(len(cut4)) # --- skim cut-weight def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr != 0 return ak.to_numpy(subarr[mask2]) cuts = Event_sel_mask cuts_pho_EE = ak.flatten(isEE_mask) cuts_pho_EB = ak.flatten(isEB_mask) print( "cut0: {0}, cut1: {1}, cut2: {2}, cut3: {3}, cut4: {4} ,cut5 {5} ". format(len(Initial_events), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5))) # Weight and SF here if not isData: weights.add('pileup', pu) weights.add('ele_id', ele_medium_id_sf) weights.add('pho_id', get_pho_medium_id_sf) weights.add('ele_reco', ele_reco_sf) # 2016,2017 are not applied yet if self._year == "2018": weights.add('ele_trigger', ele_trig_weight) # ---------------------------- Fill hist --------------------------------------# # Initial events out["sumw"][dataset] += len(Initial_events) # Cut flow loop for cut in [cut0, cut1, cut2, cut3, cut4, cut5]: out["cutflow"].fill(dataset=dataset, cutflow=cut) # Primary vertex out['nPV'].fill( dataset=dataset, nPV=nPV, ) out['nPV_nw'].fill(dataset=dataset, nPV_nw=nPV_nw) # Fill hist # -- met -- # out["met"].fill(dataset=dataset, met=MET_PT, weight=skim_weight(weights.weight() * cuts)) # --mass -- # out["mass"].fill(dataset=dataset, mass=Diele_mass, weight=skim_weight(weights.weight() * cuts)) # -- Ele1 -- # out["ele1pt"].fill(dataset=dataset, ele1pt=Ele1_PT, weight=skim_weight(weights.weight() * cuts)) out["ele1eta"].fill(dataset=dataset, ele1eta=Ele1_Eta, weight=skim_weight(weights.weight() * cuts)) out["ele1phi"].fill(dataset=dataset, ele1phi=Ele1_Phi, weight=skim_weight(weights.weight() * cuts)) # --Ele2 --# out["ele2pt"].fill(dataset=dataset, ele2pt=Ele2_PT, weight=skim_weight(weights.weight() * cuts)) out["ele2eta"].fill(dataset=dataset, ele2eta=Ele2_Eta, weight=skim_weight(weights.weight() * cuts)) out["ele2phi"].fill(dataset=dataset, ele2phi=Ele2_Phi, weight=skim_weight(weights.weight() * cuts)) # -- Photon -- # out["phopt"].fill(dataset=dataset, phopt=Pho_PT, weight=skim_weight(weights.weight() * cuts)) out["phoeta"].fill(dataset=dataset, phoeta=Pho_Eta, weight=skim_weight(weights.weight() * cuts)) out["phophi"].fill(dataset=dataset, phophi=Pho_Phi, weight=skim_weight(weights.weight() * cuts)) # -- Binned sieie hist -- # if len(binned_sieie_hist['PT_1_eta_1'] > 0): out['PT_1_eta_1'].fill(dataset=dataset, PT_1_eta_1=binned_sieie_hist['PT_1_eta_1']) if len(binned_sieie_hist['PT_1_eta_2'] > 0): out['PT_1_eta_2'].fill(dataset=dataset, PT_1_eta_2=binned_sieie_hist['PT_1_eta_2']) if len(binned_sieie_hist['PT_1_eta_3'] > 0): out['PT_1_eta_3'].fill(dataset=dataset, PT_1_eta_3=binned_sieie_hist['PT_1_eta_3']) if len(binned_sieie_hist['PT_1_eta_4'] > 0): out['PT_1_eta_4'].fill(dataset=dataset, PT_1_eta_4=binned_sieie_hist['PT_1_eta_4']) if len(binned_sieie_hist['PT_2_eta_1'] > 0): out['PT_2_eta_1'].fill(dataset=dataset, PT_2_eta_1=binned_sieie_hist['PT_2_eta_1']) if len(binned_sieie_hist['PT_2_eta_2'] > 0): out['PT_2_eta_2'].fill(dataset=dataset, PT_2_eta_2=binned_sieie_hist['PT_2_eta_2']) if len(binned_sieie_hist['PT_2_eta_3'] > 0): out['PT_2_eta_3'].fill(dataset=dataset, PT_2_eta_3=binned_sieie_hist['PT_2_eta_3']) if len(binned_sieie_hist['PT_2_eta_4'] > 0): out['PT_2_eta_4'].fill(dataset=dataset, PT_2_eta_4=binned_sieie_hist['PT_2_eta_4']) if len(binned_sieie_hist['PT_3_eta_1'] > 0): out['PT_3_eta_1'].fill(dataset=dataset, PT_3_eta_1=binned_sieie_hist['PT_3_eta_1']) if len(binned_sieie_hist['PT_3_eta_2'] > 0): out['PT_3_eta_2'].fill(dataset=dataset, PT_3_eta_2=binned_sieie_hist['PT_3_eta_2']) if len(binned_sieie_hist['PT_3_eta_3'] > 0): out['PT_3_eta_3'].fill(dataset=dataset, PT_3_eta_3=binned_sieie_hist['PT_3_eta_3']) if len(binned_sieie_hist['PT_3_eta_4'] > 0): out['PT_3_eta_4'].fill(dataset=dataset, PT_3_eta_4=binned_sieie_hist['PT_3_eta_4']) if len(binned_sieie_hist['PT_4_eta_1'] > 0): out['PT_4_eta_1'].fill(dataset=dataset, PT_4_eta_1=binned_sieie_hist['PT_4_eta_1']) if len(binned_sieie_hist['PT_4_eta_2'] > 0): out['PT_4_eta_2'].fill(dataset=dataset, PT_4_eta_2=binned_sieie_hist['PT_4_eta_2']) if len(binned_sieie_hist['PT_4_eta_3'] > 0): out['PT_4_eta_3'].fill(dataset=dataset, PT_4_eta_3=binned_sieie_hist['PT_4_eta_3']) if len(binned_sieie_hist['PT_4_eta_4'] > 0): out['PT_4_eta_4'].fill(dataset=dataset, PT_4_eta_4=binned_sieie_hist['PT_4_eta_4']) return out