def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) if self.runlistrigger is not None: dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger], "run_number") hNorm = TH1F("hEvForNorm", "hEvForNorm", 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 if not dfevtorig.empty: nselevt = len(dfevtorig.query("is_ev_rej==0")) norm = getnormforselevt(dfevtorig) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() dfevtorig = dfevtorig.query("is_ev_rej==0") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.doml is True: df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) suffix = "%s%d_%d" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) if self.runlistrigger is not None: df = selectdfrunlist(df, \ self.run_param[self.runlistrigger], "run_number") fill_hist(h_invmass, df.inv_mass) myfile.cd() h_invmass.Write() if self.mcordata == "mc": df[self.v_ismcrefl] = np.array(tag_bit_df( df, self.v_bitvar, self.b_mcrefl), dtype=int) df_sig = df[df[self.v_ismcsignal] == 1] df_refl = df[df[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_sig.inv_mass) fill_hist(h_invmass_refl, df_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write() print("FINISHED")
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) neventsorig = len(dfevtorig) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) neventsaftertrigger = len(dfevtorig) if self.runlistrigger is not None: dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger], "run_number") neventsafterrunsel = len(dfevtorig) dfevtevtsel = dfevtorig.query(self.s_evtsel) #validation plot for event selection neventsafterevtsel = len(dfevtevtsel) histonorm = TH1F("histonorm", "histonorm", 10, 0, 10) histonorm.SetBinContent(1, neventsorig) histonorm.GetXaxis().SetBinLabel(1, "tot events") histonorm.SetBinContent(2, neventsaftertrigger) histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger") histonorm.SetBinContent(3, neventsafterrunsel) histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel") histonorm.SetBinContent(4, neventsafterevtsel) histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel") for ibin2 in range(len(self.lvar2_binmin)): binneddf = seldf_singlevar_inclusive(dfevtevtsel, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) histonorm.SetBinContent(5 + ibin2, len(binneddf)) histonorm.GetXaxis().SetBinLabel(5 + ibin2, \ "tot events after mult sel %d - %d" % \ (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])) histonorm.Write() labeltrigger = "hbit%svs%s" % (self.triggerbit, self.v_var2_binning_gen) myfile.cd() hsel, hnovtxmult, hvtxoutmult = \ self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \ labeltrigger, self.v_var2_binning_gen) if self.usetriggcorrfunc is not None and self.mcordata == "data": hselweight, hnovtxmultweight, hvtxoutmultweight = \ self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \ labeltrigger, self.v_var2_binning_gen, self.usetriggcorrfunc) hselweight.Write() hnovtxmultweight.Write() hvtxoutmultweight.Write() hsel.Write() hnovtxmult.Write() hvtxoutmult.Write() list_df_recodtrig = [] for ipt in range(self.p_nptfinbins): # pylint: disable=too-many-nested-blocks bin_id = self.bin_matching[ipt] df = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) if self.runlistrigger is not None: df = selectdfrunlist(df, \ self.run_param[self.runlistrigger], "run_number") if self.doml is True: df = df.query(self.l_selml[bin_id]) list_df_recodtrig.append(df) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) if self.do_custom_analysis_cuts: df = self.apply_cuts_ptbin(df, ipt) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) curr_dir = myfile.mkdir(f"bin1_{ipt}_bin2_{ibin2}") meta_info = create_meta_info( self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2], self.lpt_probcutfin[bin_id]) write_meta_info(curr_dir, meta_info) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) if self.usetriggcorrfunc is not None and self.mcordata == "data": weights = self.make_weights( df_bin[self.v_var2_binning_gen], self.weightfunc, self.weighthist, self.usetriggcorrfunc) weightsinv = [1. / weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df( df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write() if self.event_cand_validation is True: df_recodtrig = pd.concat(list_df_recodtrig) df_recodtrig = df_recodtrig.query("inv_mass>%f and inv_mass<%f" % \ (self.mass - 0.15, self.mass + 0.15)) dfevtwithd = pd.merge(dfevtevtsel, df_recodtrig, on=self.v_evtmatch) label = "h%s" % self.v_var2_binning_gen histomult = TH1F(label, label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) fill_hist(histomult, dfevtevtsel[self.v_var2_binning_gen]) histomult.Write() labelwithd = "h%s_withd" % self.v_var2_binning_gen histomultwithd = TH1F(labelwithd, labelwithd, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) fill_hist(histomultwithd, dfevtwithd["%s_x" % self.v_var2_binning_gen]) histomultwithd.Write() # Validation histograms fill_validation_vertex(dfevtorig, dfevtevtsel, df_recodtrig).write() fill_validation_multiplicity(dfevtorig, dfevtevtsel, df_recodtrig).write() fill_validation_candidates(df_recodtrig).write() if self.mcordata == "mc": fill_validation_candidates( df_recodtrig[df_recodtrig[self.v_ismcsignal] == 1], "MC").write()
def process_valevents(self, file_index): dfevt = pickle.load(openfile(self.l_evtorig[file_index], "rb")) dfevt = dfevt.query("is_ev_rej==0") dfevtmb = pickle.load(openfile(self.l_evtorig[file_index], "rb")) dfevtmb = dfevtmb.query("is_ev_rej==0") myrunlisttrigmb = self.runlistrigger["INT7"] dfevtselmb = selectdfrunlist(dfevtmb, self.run_param[myrunlisttrigmb], "run_number") triggerlist = ["INT7", "HighMultV0", "HighMultSPD"] varlist = ["v0m_corr", "n_tracklets_corr", "perc_v0m"] nbinsvar = [100, 200, 200] minrvar = [0, 0, 0] maxrvar = [1500, 200, .5] fileevtroot = TFile.Open(self.l_evtvalroot[file_index], "recreate") hv0mvsperc = scatterplot(dfevt, "perc_v0m", "v0m_corr", 50000, 0, 100, 200, 0., 2000.) hv0mvsperc.SetName("hv0mvsperc") hv0mvsperc.Write() dfevtnorm = pickle.load(openfile(self.l_evtorig[file_index], "rb")) hntrklsperc = scatterplot(dfevt, "perc_v0m", "n_tracklets_corr", 50000, 0, 100, 200, 0., 2000.) hntrklsperc.SetName("hntrklsperc") hntrklsperc.Write() for ivar, var in enumerate(varlist): label = "hbitINT7vs%s" % (var) histoMB = TH1F(label, label, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) fill_hist(histoMB, dfevtselmb.query("trigger_hasbit_INT7==1")[var]) histoMB.Sumw2() histoMB.Write() for trigger in triggerlist: triggerbit = "trigger_hasbit_%s==1" % trigger labeltriggerANDMB = "hbit%sANDINT7vs%s" % (trigger, var) labeltrigger = "hbit%svs%s" % (trigger, var) histotrigANDMB = TH1F(labeltriggerANDMB, labeltriggerANDMB, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) histotrig = TH1F(labeltrigger, labeltrigger, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) myrunlisttrig = self.runlistrigger[trigger] ev = len(dfevt) dfevtsel = selectdfrunlist(dfevt, self.run_param[myrunlisttrig], "run_number") if len(dfevtsel) < ev: print("Reduced number of events in trigger", trigger) print(ev, len(dfevtsel)) fill_hist(histotrigANDMB, dfevtsel.query(triggerbit + " and trigger_hasbit_INT7==1")[var]) fill_hist(histotrig, dfevtsel.query(triggerbit)[var]) histotrigANDMB.Sumw2() histotrig.Sumw2() histotrigANDMB.Write() histotrig.Write() hSelMult = TH1F('sel_' + labeltrigger, 'sel_' + labeltrigger, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) hNoVtxMult = TH1F('novtx_' + labeltrigger, 'novtx_' + labeltrigger, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) hVtxOutMult = TH1F('vtxout_' + labeltrigger, 'vtxout_' + labeltrigger, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) # multiplicity dependent normalisation dftrg = dfevtnorm.query(triggerbit) dfsel = dftrg.query('is_ev_rej == 0') df_to_keep = filter_bit_df(dftrg, 'is_ev_rej', [[], [0, 5, 6, 10, 11]]) # events with reco vtx after previous selection tag_vtx = tag_bit_df(df_to_keep, 'is_ev_rej', [[], [1, 2, 7, 12]]) df_no_vtx = df_to_keep[~tag_vtx.values] # events with reco zvtx > 10 cm after previous selection df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'is_ev_rej', [[3], [1, 2, 7, 12]]) fill_hist(hSelMult, dfsel[var]) fill_hist(hNoVtxMult, df_no_vtx[var]) fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var]) hSelMult.Write() hNoVtxMult.Write() hVtxOutMult.Write() hNorm = TH1F("hEvForNorm", ";;Normalisation", 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 if not dfevtnorm.empty: nselevt = len(dfevtnorm.query("is_ev_rej==0")) norm = getnormforselevt(dfevtnorm) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() fileevtroot.Close()
def process_response(self): list_df_mc_reco = [] list_df_mc_gen = [] for iptskim, _ in enumerate(self.lpt_anbinmin): df_mc_reco = pickle.load(openfile(self.lpt_recodecmerged[iptskim], "rb")) if "pt_jet" not in df_mc_reco.columns: print("Jet variables not found in the dataframe. Skipping process_response.") return if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) df_mc_reco = selectdfrunlist(df_mc_reco, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") if self.doml is True: df_mc_reco = df_mc_reco.query(self.l_selml[iptskim]) else: print("Doing std analysis") list_df_mc_reco.append(df_mc_reco) df_mc_gen = pickle.load(openfile(self.lpt_gendecmerged[iptskim], "rb")) df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) list_df_mc_gen.append(df_mc_gen) df_rec = pd.concat(list_df_mc_reco) df_gen = pd.concat(list_df_mc_gen) his_njets = TH1F("his_njets_gen", "Number of MC jets", 1, 0, 1) his_njets.SetBinContent(1, len(df_gen.index)) # total number of generated & selected jets for normalisation df_rec = df_rec[df_rec.ismcfd == 1] # reconstructed & selected non-prompt jets df_gen = df_gen[df_gen.ismcfd == 1] # generated & selected non-prompt jets out_file = TFile.Open(self.n_fileeff, "update") # Bin arrays # pt_cand n_bins_ptc = len(self.lpt_finbinmin) bins_ptc_temp = self.lpt_finbinmin.copy() bins_ptc_temp.append(self.lpt_finbinmax[n_bins_ptc - 1]) bins_ptc = array.array('d', bins_ptc_temp) # pt_jet n_bins_ptjet = len(self.lvar2_binmin) bins_ptjet_temp = self.lvar2_binmin.copy() bins_ptjet_temp.append(self.lvar2_binmax[n_bins_ptjet - 1]) bins_ptjet = array.array('d', bins_ptjet_temp) # z bins_z_temp = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1] n_bins_z = len(bins_z_temp) - 1 bins_z = array.array('d', bins_z_temp) # Detector response matrix of pt_jet of non-prompt jets df_resp_jet_fd = df_rec.loc[:, ["pt_gen_jet", "pt_jet"]] his_resp_jet_fd = TH2F("his_resp_jet_fd", \ "Response matrix of #it{p}_{T}^{jet, ch} of non-prompt jets;#it{p}_{T}^{jet, ch, gen.} (GeV/#it{c});#it{p}_{T}^{jet, ch, rec.} (GeV/#it{c})", \ 100, 0, 100, 100, 0, 100) fill_hist(his_resp_jet_fd, df_resp_jet_fd) # Simulated pt_cand vs. pt_jet of non-prompt jets df_ptc_ptjet_fd = df_gen.loc[:, ["pt_cand", "pt_jet"]] his_ptc_ptjet_fd = TH2F("his_ptc_ptjet_fd", \ "Simulated #it{p}_{T}^{cand.} vs. #it{p}_{T}^{jet} of non-prompt jets;#it{p}_{T}^{cand., gen.} (GeV/#it{c});#it{p}_{T}^{jet, ch, gen.} (GeV/#it{c})", \ n_bins_ptc, bins_ptc, 100, 0, 100) fill_hist(his_ptc_ptjet_fd, df_ptc_ptjet_fd) # z_gen of reconstructed feed-down jets (for response) arr_z_gen_resp = z_gen_calc(df_rec.pt_gen_jet, df_rec.phi_gen_jet, df_rec.eta_gen_jet, df_rec.pt_gen_cand, df_rec.delta_phi_gen_jet, df_rec.delta_eta_gen_jet) # z_rec of reconstructed feed-down jets (for response) arr_z_rec_resp = z_calc(df_rec.pt_jet, df_rec.phi_jet, df_rec.eta_jet, df_rec.pt_cand, df_rec.phi_cand, df_rec.eta_cand) # z_gen of simulated feed-down jets arr_z_gen_sim = z_calc(df_gen.pt_jet, df_gen.phi_jet, df_gen.eta_jet, df_gen.pt_cand, df_gen.phi_cand, df_gen.eta_cand) df_rec["z_gen"] = arr_z_gen_resp df_rec["z"] = arr_z_rec_resp df_gen["z"] = arr_z_gen_sim # Simulated pt_cand vs. pt_jet vs z of non-prompt jets df_ptc_ptjet_z_fd = df_gen.loc[:, ["pt_cand", "pt_jet", "z"]] his_ptc_ptjet_z_fd = TH3F("his_ptc_ptjet_z_fd", \ "Simulated #it{p}_{T}^{cand.} vs. #it{p}_{T}^{jet} vs. #it{z} of non-prompt jets;" "#it{p}_{T}^{cand., gen.} (GeV/#it{c});" "#it{p}_{T}^{jet, ch, gen.} (GeV/#it{c});" "#it{z}", \ n_bins_ptc, bins_ptc, n_bins_ptjet, bins_ptjet, n_bins_z, bins_z) fill_hist(his_ptc_ptjet_z_fd, df_ptc_ptjet_z_fd) # Create response matrix for feed-down smearing # x axis = z, y axis = pt_jet his_resp_rec = TH2F("his_resp_rec", "his_resp_rec", n_bins_z, bins_z, n_bins_ptjet, bins_ptjet) his_resp_gen = TH2F("his_resp_gen", "his_resp_gen", n_bins_z, bins_z, n_bins_ptjet, bins_ptjet) resp_z = RooUnfoldResponse(his_resp_rec, his_resp_gen) for row in df_rec.itertuples(): resp_z.Fill(row.z, row.pt_jet, row.z_gen, row.pt_gen_jet) out_file.cd() his_resp_jet_fd.Write() his_ptc_ptjet_fd.Write() his_ptc_ptjet_z_fd.Write() his_njets.Write() resp_z.Write("resp_z") out_file.Close()
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") for ibin2 in range(len(self.lvar2_binmin)): mybindfevtorig = seldf_singlevar(dfevtorig, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) hNorm = TH1F("hEvForNorm_mult%d" % ibin2, "hEvForNorm_mult%d" % ibin2, 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 if not mybindfevtorig.empty: nselevt = len(mybindfevtorig.query("is_ev_rej==0")) norm = getnormforselevt(mybindfevtorig) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() # histmultevt = TH1F("hmultevtmult%d" % ibin2, # "hmultevtmult%d" % ibin2, 100, 0, 100) mybindfevtorig = mybindfevtorig.query("is_ev_rej==0") # fill_hist(histmultevt, mybindfevtorig.n_tracklets_corr) # histmultevt.Write() # h_v0m_ntracklets = TH2F("h_v0m_ntracklets%d" % ibin2, # "h_v0m_ntracklets%d" % ibin2, # 200, 0, 200, 200, -0.5, 1999.5) # v_v0m_ntracklets = np.vstack((mybindfevtorig.n_tracklets_corr, # mybindfevtorig.v0m_corr)).T # fill_hist(h_v0m_ntracklets, v_v0m_ntracklets) # h_v0m_ntracklets.Write() for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.doml is True: df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_bin = selectdfrunlist(df_bin, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") fill_hist(h_invmass, df_bin.inv_mass) if "INT7" not in self.triggerbit and self.mcordata == "data": fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning_gen) funcweighttrig = fileweight.Get(namefunction) if funcweighttrig: weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1./weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() histmult = TH1F("hmultpt%dmult%d" % (ipt, ibin2), "hmultpt%dmult%d" % (ipt, ibin2), 1000, 0, 1000) fill_hist(histmult, df_bin.n_tracklets_corr) histmult.Write() h_v0m_ntrackletsD = TH2F("h_v0m_ntrackletsD%d%d" % (ibin2, ipt), "h_v0m_ntrackletsD%d%d" % (ibin2, ipt), 200, 0, 200, 200, -0.5, 1999.5) v_v0m_ntrackletsD = np.vstack((df_bin.n_tracklets_corr, df_bin.v0m_corr)).T fill_hist(h_v0m_ntrackletsD, v_v0m_ntrackletsD) h_v0m_ntrackletsD.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write()
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] arraysub = [0 for ival in range(len(dfreco))] n_tracklets_corr = dfreco["n_tracklets_corr"].values n_tracklets_corr_sub = None for iprong in range(self.nprongs): spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \ for index in range(len(dfreco))] arraysub = np.add(ntrackletsthisprong, arraysub) n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub) dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def cutvariation_efficiencies(self, min_cv_cut, max_cv_cut): myfile = TFile.Open(self.n_fileeff_cutvar, "recreate") h_gen_pr = [] h_sel_pr = [] h_gen_fd = [] h_sel_fd = [] idx = 0 for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged_mc[bin_id], "rb")) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger_mc is not None: df = df.query(self.s_trigger_mc) print("Using run selection for eff histo", self.runlistrigger[self.triggerbit], \ "for period", self.period) df = selectdfrunlist(df, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df = seldf_singlevar(df, self.v_var_binning, self.lpt_finbinmin[ipt], \ self.lpt_finbinmax[ipt]) df_gen = pickle.load(openfile(self.lpt_gendecmerged[bin_id], "rb")) df_gen = df_gen.query(self.s_presel_gen_eff) df_gen = selectdfrunlist(df_gen, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_gen = seldf_singlevar(df_gen, self.v_var_binning, self.lpt_finbinmin[ipt], \ self.lpt_finbinmax[ipt]) stepsmin = (self.lpt_probcutfin[bin_id] - min_cv_cut[ipt]) / self.p_ncutvar stepsmax = (max_cv_cut[ipt] - self.lpt_probcutfin[bin_id]) / self.p_ncutvar ntrials = 2 * self.p_ncutvar + 1 icvmax = 1 idx = 0 for icv in range(ntrials): if icv < self.p_ncutvar: selml_cvval = min_cv_cut[ipt] + icv * stepsmin elif icv == self.p_ncutvar: selml_cvval = self.lpt_probcutfin[bin_id] else: selml_cvval = self.lpt_probcutfin[bin_id] + icvmax * stepsmax icvmax = icvmax + 1 selml_cv = "y_test_prob%s>%s" % (self.p_modelname, selml_cvval) print("Cutting on: ", selml_cv) df = df.query(selml_cv) for ibin2 in range(len(self.lvar2_binmin)): stringbin2 = "_%d_%s_%.2f_%.2f" % (icv, \ self.v_var2_binning, \ self.lvar2_binmin[ibin2], \ self.lvar2_binmax[ibin2]) if ipt == 0: n_bins = len(self.lpt_finbinmin) analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1]) analysis_bin_lims = array('f', analysis_bin_lims_temp) h_gen_pr.append(TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims)) h_sel_pr.append(TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims)) h_gen_fd.append(TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims)) h_sel_fd.append(TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims)) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], \ self.lvar2_binmax[ibin2]) df_gen_bin = seldf_singlevar(df_gen, self.v_var2_binning, self.lvar2_binmin[ibin2], \ self.lvar2_binmax[ibin2]) df_sel_pr = df_bin[df_bin.ismcprompt == 1] df_gen_pr = df_gen_bin[df_gen_bin.ismcprompt == 1] df_sel_fd = df_bin[df_bin.ismcfd == 1] df_gen_fd = df_gen_bin[df_gen_bin.ismcfd == 1] h_gen_pr[idx].SetBinContent(ipt + 1, len(df_gen_pr)) h_gen_pr[idx].SetBinError(ipt + 1, math.sqrt(len(df_gen_pr))) h_sel_pr[idx].SetBinContent(ipt + 1, len(df_sel_pr)) h_sel_pr[idx].SetBinError(ipt + 1, math.sqrt(len(df_sel_pr))) h_gen_fd[idx].SetBinContent(ipt + 1, len(df_gen_fd)) h_gen_fd[idx].SetBinError(ipt + 1, math.sqrt(len(df_gen_fd))) h_sel_fd[idx].SetBinContent(ipt + 1, len(df_sel_fd)) h_sel_fd[idx].SetBinError(ipt + 1, math.sqrt(len(df_sel_fd))) idx = idx + 1 myfile.cd() for i in range(idx): h_gen_pr[i].Write() h_sel_pr[i].Write() h_gen_fd[i].Write() h_sel_fd[i].Write()
def process_efficiency_single(self, index): out_file = TFile.Open(self.l_histoeff[index], "recreate") for ibin2 in range(self.p_nbin2_reco): stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], \ self.lvar2_binmax_reco[ibin2]) n_bins = self.p_nptfinbins analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1]) analysis_bin_lims = array.array('f', analysis_bin_lims_temp) h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) bincounter = 0 for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_jetsel_reco is not None: df_mc_reco = df_mc_reco.query(self.s_jetsel_reco) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) df_mc_reco = selectdfrunlist(df_mc_reco, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_gen = pickle.load(openfile(self.mptfiles_gensk[bin_id][index], "rb")) df_mc_gen = df_mc_gen.query(self.s_jetsel_gen) df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] df_reco_sel_pr = None if self.doml is True: df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id]) else: df_reco_sel_pr = df_reco_presel_pr.copy() df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1] df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1] df_reco_sel_fd = None if self.doml is True: df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id]) else: df_reco_sel_fd = df_reco_presel_fd.copy() val = len(df_gen_sel_pr) err = math.sqrt(val) h_gen_pr.SetBinContent(bincounter + 1, val) h_gen_pr.SetBinError(bincounter + 1, err) val = len(df_reco_presel_pr) err = math.sqrt(val) h_presel_pr.SetBinContent(bincounter + 1, val) h_presel_pr.SetBinError(bincounter + 1, err) val = len(df_reco_sel_pr) err = math.sqrt(val) h_sel_pr.SetBinContent(bincounter + 1, val) h_sel_pr.SetBinError(bincounter + 1, err) val = len(df_gen_sel_fd) err = math.sqrt(val) h_gen_fd.SetBinContent(bincounter + 1, val) h_gen_fd.SetBinError(bincounter + 1, err) val = len(df_reco_presel_fd) err = math.sqrt(val) h_presel_fd.SetBinContent(bincounter + 1, val) h_presel_fd.SetBinError(bincounter + 1, err) val = len(df_reco_sel_fd) err = math.sqrt(val) h_sel_fd.SetBinContent(bincounter + 1, val) h_sel_fd.SetBinError(bincounter + 1, err) bincounter = bincounter + 1 out_file.cd() h_gen_pr.Write() h_presel_pr.Write() h_sel_pr.Write() h_gen_fd.Write() h_presel_fd.Write() h_sel_fd.Write()
def process_efficiency(self): out_file = TFile.Open(self.n_fileeff, "recreate") for ibin2 in range(len(self.lvar2_binmin)): stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin[ibin2], \ self.lvar2_binmax[ibin2]) print(stringbin2) n_bins = len(self.lpt_finbinmin) analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1]) analysis_bin_lims = array.array('f', analysis_bin_lims_temp) h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) bincounter = 0 for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load( openfile(self.lpt_recodecmerged[bin_id], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) print("Using run selection for eff histo", self.runlistrigger[self.triggerbit], "for period", self.period) df_mc_reco = selectdfrunlist(df_mc_reco, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_gen = pickle.load( openfile(self.lpt_gendecmerged[bin_id], "rb")) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id]) df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1] df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1] df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id]) val, err = self.get_reweighted_count(df_gen_sel_pr) h_gen_pr.SetBinContent(bincounter + 1, val) h_gen_pr.SetBinError(bincounter + 1, err) val, err = self.get_reweighted_count(df_reco_presel_pr) h_presel_pr.SetBinContent(bincounter + 1, val) h_presel_pr.SetBinError(bincounter + 1, err) val, err = self.get_reweighted_count(df_reco_sel_pr) h_sel_pr.SetBinContent(bincounter + 1, val) h_sel_pr.SetBinError(bincounter + 1, err) #print("prompt efficiency tot ptbin=", bincounter, ", value = ", # len(df_reco_sel_pr)/len(df_gen_sel_pr)) val, err = self.get_reweighted_count(df_gen_sel_fd) h_gen_fd.SetBinContent(bincounter + 1, val) h_gen_fd.SetBinError(bincounter + 1, err) val, err = self.get_reweighted_count(df_reco_presel_fd) h_presel_fd.SetBinContent(bincounter + 1, val) h_presel_fd.SetBinError(bincounter + 1, err) val, err = self.get_reweighted_count(df_reco_sel_fd) h_sel_fd.SetBinContent(bincounter + 1, val) h_sel_fd.SetBinError(bincounter + 1, err) #print("fd efficiency tot ptbin=", bincounter, ", value = ", # len(df_reco_sel_fd)/len(df_gen_sel_fd)) bincounter = bincounter + 1 out_file.cd() h_gen_pr.Write() h_presel_pr.Write() h_sel_pr.Write() h_gen_fd.Write() h_presel_fd.Write() h_sel_fd.Write()
def process_histomass(self): myfile = TFile.Open(self.n_filemass, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb")) df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) print("Using run selection for mass histo", self.runlistrigger[self.triggerbit], "for period", self.period) df_bin = selectdfrunlist(df_bin, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") fill_hist(h_invmass, df_bin.inv_mass) if "INT7" not in self.triggerbit and self.mcordata == "data": fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning) funcweighttrig = fileweight.Get(namefunction) if funcweighttrig: weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1. / weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df( df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write()
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) neventsorig = len(dfevtorig) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) neventsaftertrigger = len(dfevtorig) if self.runlistrigger is not None: dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger], "run_number") neventsafterrunsel = len(dfevtorig) if self.s_evtsel is not None: dfevtevtsel = dfevtorig.query(self.s_evtsel) else: dfevtevtsel = dfevtorig neventsafterevtsel = len(dfevtevtsel) #validation plot for event selection histonorm = TH1F("histonorm", "histonorm", 10, 0, 10) histonorm.SetBinContent(1, neventsorig) histonorm.GetXaxis().SetBinLabel(1, "tot events") histonorm.SetBinContent(2, neventsaftertrigger) histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger") histonorm.SetBinContent(3, neventsafterrunsel) histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel") histonorm.SetBinContent(4, neventsafterevtsel) histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel") histonorm.Write() myfile.cd() labeltrigger = "hbit%s" % (self.triggerbit) hsel, hnovtxmult, hvtxoutmult = gethistonormforselevt_varsel(dfevtorig, dfevtevtsel, \ labeltrigger, self.s_var_evt_sel) hsel.Write() hnovtxmult.Write() hvtxoutmult.Write() for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) if self.runlistrigger is not None: df = selectdfrunlist(df, \ self.run_param[self.runlistrigger], "run_number") if self.doml is True: df = df.query(self.l_selml[bin_id]) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) if self.do_custom_analysis_cuts: df = self.apply_cuts_ptbin(df, ipt) if self.mltype == "MultiClassification": suffix = "%s%d_%d_%.2f%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0], self.lpt_probcutfin[ipt][1]) else: suffix = "%s%d_%d_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass, df[self.v_invmass]) myfile.cd() h_invmass.Write() if self.mcordata == "mc": df[self.v_ismcrefl] = np.array(tag_bit_df( df, self.v_bitvar, self.b_mcrefl), dtype=int) df_sig = df[df[self.v_ismcsignal] == 1] df_bkg = df[df[self.v_ismcbkg] == 1] df_refl = df[df[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_bkg = TH1F("hmass_bkg" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_sig[self.v_invmass]) fill_hist(h_invmass_bkg, df_bkg[self.v_invmass]) fill_hist(h_invmass_refl, df_refl[self.v_invmass]) myfile.cd() h_invmass_sig.Write() h_invmass_bkg.Write() h_invmass_refl.Write()
def process_efficiency_single(self, index): #TO UPDATE TO DHADRON_MULT VERSION print("step1") out_file = TFile.Open(self.l_histoeff[index], "recreate") n_bins = len(self.lpt_finbinmin) analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1]) analysis_bin_lims = array.array('f', analysis_bin_lims_temp) h_gen_pr = TH1F("h_gen_pr", "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_pr = TH1F("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_pr = TH1F("h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_gen_fd = TH1F("h_gen_fd", "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_fd = TH1F("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_fd = TH1F("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) print("step2") bincounter = 0 for ipt in range(self.p_nptfinbins): print("step2a") bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) if self.runlistrigger is not None: df_mc_reco = selectdfrunlist(df_mc_reco, \ self.run_param[self.runlistrigger], "run_number") df_mc_gen = pickle.load( openfile(self.mptfiles_gensk[bin_id][index], "rb")) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) print("step2b") if self.runlistrigger is not None: df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] df_reco_sel_pr = None if self.doml is True: df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id]) else: df_reco_sel_pr = df_reco_presel_pr.copy() df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1] df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1] df_reco_sel_fd = None print("step2d") if self.doml is True: df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id]) else: df_reco_sel_fd = df_reco_presel_fd.copy() val = len(df_gen_sel_pr) err = math.sqrt(val) h_gen_pr.SetBinContent(bincounter + 1, val) h_gen_pr.SetBinError(bincounter + 1, err) val = len(df_reco_presel_pr) err = math.sqrt(val) h_presel_pr.SetBinContent(bincounter + 1, val) h_presel_pr.SetBinError(bincounter + 1, err) val = len(df_reco_sel_pr) err = math.sqrt(val) h_sel_pr.SetBinContent(bincounter + 1, val) h_sel_pr.SetBinError(bincounter + 1, err) print("step2e") val = len(df_gen_sel_fd) err = math.sqrt(val) h_gen_fd.SetBinContent(bincounter + 1, val) h_gen_fd.SetBinError(bincounter + 1, err) val = len(df_reco_presel_fd) err = math.sqrt(val) h_presel_fd.SetBinContent(bincounter + 1, val) h_presel_fd.SetBinError(bincounter + 1, err) val = len(df_reco_sel_fd) err = math.sqrt(val) h_sel_fd.SetBinContent(bincounter + 1, val) h_sel_fd.SetBinError(bincounter + 1, err) bincounter = bincounter + 1 print("step2f") out_file.cd() h_gen_pr.Write() h_presel_pr.Write() h_sel_pr.Write() h_gen_fd.Write() h_presel_fd.Write() h_sel_fd.Write() print("FINALISED")
def process_efficiency_single(self, index): out_file = TFile.Open(self.l_histoeff[index], "recreate") h_list = [] for ibin2 in range(len(self.lvar2_binmin)): stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) n_bins = len(self.lpt_finbinmin) analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1]) analysis_bin_lims = array.array('f', analysis_bin_lims_temp) def make_histo(name, title, name_extra=stringbin2, bins=n_bins, binning=analysis_bin_lims): histo = TH1F(name + name_extra, title, bins, binning) h_list.append(histo) return histo h_gen_pr = make_histo("h_gen_pr", "Prompt Generated in acceptance |y|<0.5") h_presel_pr = make_histo("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel") h_presel_pr_wotof = make_histo( "h_presel_pr_wotof", "Prompt Reco in acc woTOF |#eta|<0.8 and pre-sel") h_presel_pr_wtof = make_histo( "h_presel_pr_wtof", "Prompt Reco in acc wTOF |#eta|<0.8 and pre-sel") h_sel_pr = make_histo( "h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel") h_sel_pr_wotof = make_histo( "h_sel_pr_wotof", "Prompt Reco and sel woTOF in acc |#eta|<0.8") h_sel_pr_wtof = make_histo( "h_sel_pr_wtof", "Prompt Reco and sel wTOF in acc |#eta|<0.8") h_gen_fd = make_histo("h_gen_fd", "FD Generated in acceptance |y|<0.5") h_presel_fd = make_histo("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel") h_sel_fd = make_histo("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel") bincounter = 0 for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) if self.runlistrigger is not None: df_mc_reco = selectdfrunlist(df_mc_reco, \ self.run_param[self.runlistrigger], "run_number") df_mc_gen = pickle.load( openfile(self.mptfiles_gensk[bin_id][index], "rb")) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) if self.runlistrigger is not None: df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) # Whether or not to cut on the 2nd binning variable if self.mc_cut_on_binning2: df_mc_reco = seldf_singlevar_inclusive(df_mc_reco, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_mc_gen = seldf_singlevar_inclusive(df_mc_gen, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] df_reco_sel_pr = None if self.doml is True: df_reco_sel_pr = df_reco_presel_pr.query( self.l_selml[bin_id]) else: df_reco_sel_pr = df_reco_presel_pr.copy() df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1] df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1] df_reco_sel_fd = None if self.doml is True: df_reco_sel_fd = df_reco_presel_fd.query( self.l_selml[bin_id]) else: df_reco_sel_fd = df_reco_presel_fd.copy() def set_content(df_to_use, histogram, i_b=ibin2, b_c=bincounter): if self.corr_eff_mult[i_b] is True: val, err = self.get_reweighted_count(df_to_use, i_b) else: val = len(df_to_use) err = math.sqrt(val) histogram.SetBinContent(b_c + 1, val) histogram.SetBinError(b_c + 1, err) set_content(df_gen_sel_pr, h_gen_pr) if "nsigTOF_Pr_0" in df_reco_presel_pr: set_content( df_reco_presel_pr[ df_reco_presel_pr.nsigTOF_Pr_0 < -998], h_presel_pr_wotof) set_content( df_reco_presel_pr[ df_reco_presel_pr.nsigTOF_Pr_0 > -998], h_presel_pr_wtof) set_content(df_reco_presel_pr, h_presel_pr) set_content(df_reco_sel_pr, h_sel_pr) if "nsigTOF_Pr_0" in df_reco_sel_pr: set_content( df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 < -998], h_sel_pr_wotof) set_content( df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 > -998], h_sel_pr_wtof) set_content(df_gen_sel_fd, h_gen_fd) set_content(df_reco_presel_fd, h_presel_fd) set_content(df_reco_sel_fd, h_sel_fd) bincounter = bincounter + 1 out_file.cd() for h in h_list: h.Write() h_list = []
def unpack(self, file_index): # Open root file and save event tree to dataframe treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) # Only save events within the given run period & required centrality dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) # Reset dataframe index and save to "original" pickle file dfevtorig = dfevtorig.reset_index(drop=True) dfevtorig.to_pickle(self.l_evtorig[file_index]) # Select "good" events and save to a second pickle file dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) dfevt.to_pickle(self.l_evt[file_index]) # Open root file again, get the reconstructed tree into a dataframe treereco = uproot.open(self.l_root[file_index])[self.n_treereco] if not treereco: print('Couldn\'t find tree %s in file %s' % \ (self.n_treereco, self.l_root[file_index])) dfreco = treereco.pandas.df(branches=self.v_all) # Only save events within the given run period & required cuts dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) if 'Jet' not in self.case: isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) # Save reconstructed data to another pickle file dfreco.to_pickle(self.l_reco[file_index]) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) dfgen.to_pickle(self.l_gen[file_index])
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.doml is True: df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_jetsel_reco is not None: df = df.query(self.s_jetsel_reco) if self.s_trigger is not None: df = df.query(self.s_trigger) h_invmass_all = TH1F("hmass_%d" % ipt, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_all, df.inv_mass) myfile.cd() h_invmass_all.Write() df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(self.p_nbin2_reco): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) df_bin = selectdfrunlist(df_bin, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") # add the z column df_bin["z"] = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass, df_bin.inv_mass) myfile.cd() h_invmass.Write() massarray = [1.0 + i * (5.0 / 5000.0) for i in range(5001)] # 5000 bins in range 1.0-6.0 massarray_reco = array.array('d', massarray) zarray_reco = array.array('d', self.varshaperanges_reco) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", \ 5000, massarray_reco, self.p_nbinshape_reco, zarray_reco) h_zvsinvmass.Sumw2() fill2dhist(df_bin, h_zvsinvmass, "inv_mass", self.v_varshape_binning) h_zvsinvmass.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write()
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) fileevtroot = TFile.Open(self.l_evtorigroot[file_index], "recreate") hNorm = TH1F("hEvForNorm", ";;Normalisation", 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 # Handle silent weird behaviour of Pandas if dataframe is empty # Otherwise, if it is empty it might just silently return from this frunction for some # reason and everything what follows would just be skipped. if not dfevt.empty: nselevt = len(dfevt.query("is_ev_rej==0")) norm = getnormforselevt(dfevt) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() fileevtroot.Close() treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def process_response(self): """ First of all, we load all the mc gen and reco files that are skimmed in bins of HF candidate ptand we apply the standard selection to all of them. After this, we merged them all to create a single file of gen and reco monte carlo sample with all the HF candidate pt. In particular gen jets are selected according to run trigger, runlist, and gen jet zbin_recoand pseudorapidity. Reco candidates according to evt selection, eta jets, trigger and ml probability of the HF hadron """ zbin_reco = [] nzbin_reco = self.p_nbinshape_reco zbin_reco = self.varshaperanges_reco zbinarray_reco = array.array('d', zbin_reco) zbin_gen = [] nzbin_gen = self.p_nbinshape_gen zbin_gen = self.varshaperanges_gen zbinarray_gen = array.array('d', zbin_gen) jetptbin_reco = [] njetptbin_reco = self.p_nbin2_reco jetptbin_reco = self.var2ranges_reco jetptbinarray_reco = array.array('d', jetptbin_reco) jetptbin_gen = [] njetptbin_gen = self.p_nbin2_gen jetptbin_gen = self.var2ranges_gen jetptbinarray_gen = array.array('d', jetptbin_gen) candptbin = [] candptbin = self.lpt_finbinmin.copy() candptbin.append(self.lpt_finbinmax[-1]) candptbinarray = array.array('d', candptbin) out_file = TFile.Open(self.n_fileeff, "update") list_df_mc_reco = [] list_df_mc_gen = [] for iptskim, _ in enumerate(self.lpt_anbinmin): df_mc_gen = pickle.load(openfile(self.lpt_gendecmerged[iptskim], "rb")) df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_gen = df_mc_gen.query(self.s_jetsel_gen) list_df_mc_gen.append(df_mc_gen) df_mc_reco = pickle.load(openfile(self.lpt_recodecmerged[iptskim], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_jetsel_reco is not None: df_mc_reco = df_mc_reco.query(self.s_jetsel_reco) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) if self.doml is True: df_mc_reco = df_mc_reco.query(self.l_selml[iptskim]) list_df_mc_reco.append(df_mc_reco) # Here we can merge the dataframes corresponding to different HF pt in a # single one. In addition we are here selecting only non prompt HF df_gen = pd.concat(list_df_mc_gen) df_mc_reco = pd.concat(list_df_mc_reco) # add the z columns df_gen["z"] = z_calc(df_gen.pt_jet, df_gen.phi_jet, df_gen.eta_jet, df_gen.pt_cand, df_gen.phi_cand, df_gen.eta_cand) df_mc_reco["z"] = z_calc(df_mc_reco.pt_jet, df_mc_reco.phi_jet, df_mc_reco.eta_jet, df_mc_reco.pt_cand, df_mc_reco.phi_cand, df_mc_reco.eta_cand) df_mc_reco["z_gen"] = z_gen_calc(df_mc_reco.pt_gen_jet, df_mc_reco.phi_gen_jet, df_mc_reco.eta_gen_jet, df_mc_reco.pt_gen_cand, df_mc_reco.delta_phi_gen_jet, df_mc_reco.delta_eta_gen_jet) df_gen_nonprompt = df_gen[df_gen.ismcfd == 1] df_gen_prompt = df_gen[df_gen.ismcprompt == 1] df_mc_reco_merged_nonprompt = df_mc_reco[df_mc_reco.ismcfd == 1] df_mc_reco_merged_prompt = df_mc_reco[df_mc_reco.ismcprompt == 1] # The following plots are 3d plots all at generated level of z, # pt_jet and pt_cand. This was used in the first version of the feeddown # subtraction, currently is obsolete hzvsjetpt_gen_unmatched = TH2F("hzvsjetpt_gen_unmatched", "hzvsjetpt_gen_unmatched", \ nzbin_gen, zbinarray_gen, njetptbin_gen, jetptbinarray_gen) df_zvsjetpt_gen_unmatched = df_gen_prompt.loc[:, [self.v_varshape_binning, "pt_jet"]] fill_hist(hzvsjetpt_gen_unmatched, df_zvsjetpt_gen_unmatched) hzvsjetpt_gen_unmatched.Write() titlehist = "hzvsjetptvscandpt_gen_nonprompt" hzvsjetptvscandpt_gen_nonprompt = makefill3dhist(df_gen_nonprompt, titlehist, \ zbinarray_gen, jetptbinarray_gen, candptbinarray, self.v_varshape_binning, "pt_jet", "pt_cand") hzvsjetptvscandpt_gen_nonprompt.Write() # hz_gen_nocuts is the distribution of generated z values in b in # bins of gen_jet pt before the reco z and jetpt selection. hz_gen_cuts # also includes cut on z reco and jet pt reco. These are used for overall # efficiency correction to estimate the fraction of candidates that are # in the reco range but outside the gen range and viceversa for ibin2 in range(self.p_nbin2_gen): suffix = "%s_%.2f_%.2f" % \ (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) hz_gen_nocuts = TH1F("hz_gen_nocuts_nonprompt" + suffix, \ "hz_gen_nocuts_nonprompt" + suffix, nzbin_gen, zbinarray_gen) hz_gen_nocuts.Sumw2() hz_gen_cuts = TH1F("hz_gen_cuts_nonprompt" + suffix, "hz_gen_cuts_nonprompt" + suffix, nzbin_gen, zbinarray_gen) hz_gen_cuts.Sumw2() df_tmp = seldf_singlevar(df_mc_reco_merged_nonprompt, "pt_gen_jet", \ self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) df_tmp = seldf_singlevar(df_tmp, self.v_varshape_binning_gen, \ self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1]) fill_hist(hz_gen_nocuts, df_tmp[self.v_varshape_binning_gen]) df_tmp = seldf_singlevar(df_tmp, "pt_jet", self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) df_tmp = seldf_singlevar(df_tmp, self.v_varshape_binning, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill_hist(hz_gen_cuts, df_tmp[self.v_varshape_binning_gen]) hz_gen_cuts.Write() hz_gen_nocuts.Write() # Addendum for unfolding hz_gen_nocuts_pr = TH1F("hz_gen_nocuts" + suffix, \ "hz_gen_nocuts" + suffix, nzbin_gen, zbinarray_gen) hz_gen_nocuts_pr.Sumw2() hz_gen_cuts_pr = TH1F("hz_gen_cuts" + suffix, "hz_gen_cuts" + suffix, nzbin_gen, zbinarray_gen) hz_gen_cuts_pr.Sumw2() df_tmp_pr = seldf_singlevar(df_mc_reco_merged_prompt, "pt_gen_jet", \ self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) df_tmp_pr = seldf_singlevar(df_tmp_pr, self.v_varshape_binning_gen, \ self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1]) fill_hist(hz_gen_nocuts_pr, df_tmp_pr[self.v_varshape_binning_gen]) df_tmp_pr = seldf_singlevar(df_tmp_pr, "pt_jet", self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) df_tmp_pr = seldf_singlevar(df_tmp_pr, self.v_varshape_binning, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill_hist(hz_gen_cuts_pr, df_tmp_pr[self.v_varshape_binning_gen]) hz_gen_cuts_pr.Write() hz_gen_nocuts_pr.Write() # End addendum for unfolding df_tmp_selgen, df_tmp_selreco, df_tmp_selrecogen = \ self.create_df_closure(df_mc_reco_merged_nonprompt) df_tmp_selgen_pr, df_tmp_selreco_pr, df_tmp_selrecogen_pr = \ self.create_df_closure(df_mc_reco_merged_prompt) # histograms for response of feeddown hzvsjetpt_reco_nocuts = \ build2dhisto("hzvsjetpt_reco_nocuts_nonprompt", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_reco_cuts = \ build2dhisto("hzvsjetpt_reco_cuts_nonprompt", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_gen_nocuts = \ build2dhisto("hzvsjetpt_gen_nocuts_nonprompt", zbinarray_gen, jetptbinarray_gen) hzvsjetpt_gen_cuts = \ build2dhisto("hzvsjetpt_gen_cuts_nonprompt", zbinarray_gen, jetptbinarray_gen) hzvsjetpt_reco = hzvsjetpt_reco_nocuts.Clone("hzvsjetpt_reco_nonprompt") hzvsjetpt_gen = hzvsjetpt_gen_nocuts.Clone("hzvsjetpt_genv") response_matrix = RooUnfoldResponse(hzvsjetpt_reco, hzvsjetpt_gen) fill2dhist(df_tmp_selreco, hzvsjetpt_reco_nocuts, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selgen, hzvsjetpt_gen_nocuts, self.v_varshape_binning_gen, "pt_gen_jet") fill2dhist(df_tmp_selrecogen, hzvsjetpt_reco_cuts, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selrecogen, hzvsjetpt_gen_cuts, self.v_varshape_binning_gen, "pt_gen_jet") hzvsjetpt_reco_nocuts.Write() hzvsjetpt_gen_nocuts.Write() hzvsjetpt_reco_cuts.Write() hzvsjetpt_gen_cuts.Write() # histograms for unfolding hzvsjetpt_reco_nocuts_pr = \ build2dhisto("hzvsjetpt_reco_nocuts", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_reco_cuts_pr = \ build2dhisto("hzvsjetpt_reco_cuts", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_gen_nocuts_pr = \ build2dhisto("hzvsjetpt_gen_nocuts", zbinarray_gen, jetptbinarray_gen) hzvsjetpt_gen_cuts_pr = \ build2dhisto("hzvsjetpt_gen_cuts", zbinarray_gen, jetptbinarray_gen) fill2dhist(df_tmp_selreco_pr, hzvsjetpt_reco_nocuts_pr, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selgen_pr, hzvsjetpt_gen_nocuts_pr, self.v_varshape_binning_gen, "pt_gen_jet") fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_reco_cuts_pr, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_gen_cuts_pr, self.v_varshape_binning_gen, "pt_gen_jet") hzvsjetpt_reco_nocuts_pr.Write() hzvsjetpt_gen_nocuts_pr.Write() hzvsjetpt_reco_cuts_pr.Write() hzvsjetpt_gen_cuts_pr.Write() hzvsjetpt_reco_closure_pr = \ build2dhisto("hzvsjetpt_reco_closure", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_gen_closure_pr = \ build2dhisto("hzvsjetpt_gen_closure", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_reco_pr = \ build2dhisto("hzvsjetpt_reco", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_gen_pr = \ build2dhisto("hzvsjetpt_gen", zbinarray_gen, jetptbinarray_gen) response_matrix_pr = RooUnfoldResponse(hzvsjetpt_reco_pr, hzvsjetpt_gen_pr) response_matrix_closure_pr = RooUnfoldResponse(hzvsjetpt_reco_pr, hzvsjetpt_gen_pr) fill2dhist(df_tmp_selreco_pr, hzvsjetpt_reco_pr, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selgen_pr, hzvsjetpt_gen_pr, self.v_varshape_binning_gen, "pt_gen_jet") hzvsjetpt_reco_pr.Write() hzvsjetpt_gen_pr.Write() hjetpt_gen_nocuts_pr = TH1F("hjetpt_gen_nocuts", \ "hjetpt_gen_nocuts", njetptbin_gen, jetptbinarray_gen) hjetpt_gen_cuts_pr = TH1F("hjetpt_gen_cuts", \ "hjetpt_gen_cuts", njetptbin_gen, jetptbinarray_gen) hjetpt_gen_nocuts_closure = TH1F("hjetpt_gen_nocuts_closure", \ "hjetpt_gen_nocuts_closure", njetptbin_gen, jetptbinarray_gen) hjetpt_gen_cuts_closure = TH1F("hjetpt_gen_cuts_closure", \ "hjetpt_gen_cuts_closure", njetptbin_gen, jetptbinarray_gen) hjetpt_gen_nocuts_pr.Sumw2() hjetpt_gen_cuts_pr.Sumw2() hjetpt_gen_nocuts_closure.Sumw2() hjetpt_gen_nocuts_closure.Sumw2() fill_hist(hjetpt_gen_nocuts_pr, df_tmp_selgen_pr["pt_gen_jet"]) fill_hist(hjetpt_gen_cuts_pr, df_tmp_selrecogen_pr["pt_gen_jet"]) hjetpt_gen_nocuts_pr.Write() hjetpt_gen_cuts_pr.Write() # end of histograms for unfolding hjetpt_genvsreco_full = \ TH2F("hjetpt_genvsreco_full_nonprompt", "hjetpt_genvsreco_full_nonprompt", \ njetptbin_gen * 100, self.lvar2_binmin_gen[0], self.lvar2_binmax_gen[-1], \ njetptbin_reco * 100, self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) hz_genvsreco_full = \ TH2F("hz_genvsreco_full_nonprompt", "hz_genvsreco_full_nonprompt", \ nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], nzbin_reco * 100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen, hjetpt_genvsreco_full, "pt_gen_jet", "pt_jet") hjetpt_genvsreco_full.Scale(1.0 / hjetpt_genvsreco_full.Integral(1, -1, 1, -1)) hjetpt_genvsreco_full.Write() fill2dhist(df_tmp_selrecogen, hz_genvsreco_full, self.v_varshape_binning_gen, self.v_varshape_binning) hz_genvsreco_full.Scale(1.0 / hz_genvsreco_full.Integral(1, -1, 1, -1)) hz_genvsreco_full.Write() for row in df_tmp_selrecogen.itertuples(): response_matrix.Fill(getattr(row, self.v_varshape_binning), row.pt_jet, getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet) response_matrix.Write("response_matrix_nonprompt") # histograms for unfolding hjetpt_genvsreco_full_pr = \ TH2F("hjetpt_genvsreco_full", "hjetpt_genvsreco_full", \ njetptbin_gen * 100, self.lvar2_binmin_gen[0], self.lvar2_binmax_gen[-1], \ njetptbin_reco * 100, self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) hz_genvsreco_full_pr = \ TH2F("hz_genvsreco_full", "hz_genvsreco_full", \ nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], nzbin_reco * 100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_pr, hjetpt_genvsreco_full_pr, "pt_gen_jet", "pt_jet") hjetpt_genvsreco_full_pr.Scale(1.0 / hjetpt_genvsreco_full_pr.Integral(1, -1, 1, -1)) hjetpt_genvsreco_full_pr.Write() fill2dhist(df_tmp_selrecogen_pr, hz_genvsreco_full_pr, self.v_varshape_binning_gen, self.v_varshape_binning) hz_genvsreco_full_pr.Scale(1.0 / hz_genvsreco_full_pr.Integral(1, -1, 1, -1)) hz_genvsreco_full_pr.Write() hzvsjetpt_prior_weights = build2dhisto("hzvsjetpt_prior_weights", \ zbinarray_gen, jetptbinarray_gen) fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_prior_weights, self.v_varshape_binning_gen, "pt_gen_jet") # end of histograms for unfolding for ibin2 in range(self.p_nbin2_reco): df_tmp_selrecogen_jetbin = seldf_singlevar(df_tmp_selrecogen, "pt_jet", \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) hz_genvsreco = TH2F("hz_genvsreco_nonprompt" + suffix, "hz_genvsreco_nonprompt" + suffix, \ nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], \ nzbin_reco*100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_jetbin, hz_genvsreco, self.v_varshape_binning_gen, self.v_varshape_binning) norm = hz_genvsreco.Integral(1, -1, 1, -1) if norm > 0: hz_genvsreco.Scale(1.0/norm) hz_genvsreco.Write() df_tmp_selrecogen_pr_jetbin = seldf_singlevar(df_tmp_selrecogen_pr, "pt_jet", \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) hz_genvsreco_pr = TH2F("hz_genvsreco" + suffix, "hz_genvsreco" + suffix, \ nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], \ nzbin_reco*100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_pr_jetbin, hz_genvsreco_pr, self.v_varshape_binning_gen, self.v_varshape_binning) norm_pr = hz_genvsreco_pr.Integral(1, -1, 1, -1) if norm_pr > 0: hz_genvsreco_pr.Scale(1.0/norm_pr) hz_genvsreco_pr.Write() for ibinshape in range(len(self.lvarshape_binmin_reco)): df_tmp_selrecogen_zbin = seldf_singlevar(df_tmp_selrecogen, self.v_varshape_binning, \ self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape]) suffix = "%s_%.2f_%.2f" % \ (self.v_varshape_binning, self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape]) hjetpt_genvsreco = TH2F("hjetpt_genvsreco_nonprompt" + suffix, \ "hjetpt_genvsreco_nonprompt" + suffix, njetptbin_gen * 100, self.lvar2_binmin_gen[0], \ self.lvar2_binmax_gen[-1], njetptbin_reco * 100, self.lvar2_binmin_reco[0], \ self.lvar2_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_zbin, hjetpt_genvsreco, "pt_gen_jet", "pt_jet") norm = hjetpt_genvsreco.Integral(1, -1, 1, -1) if norm > 0: hjetpt_genvsreco.Scale(1.0/norm) hjetpt_genvsreco.Write() df_tmp_selrecogen_pr_zbin = seldf_singlevar(df_tmp_selrecogen_pr, self.v_varshape_binning, \ self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape]) suffix = "%s_%.2f_%.2f" % \ (self.v_varshape_binning, self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape]) hjetpt_genvsreco_pr = TH2F("hjetpt_genvsreco" + suffix, \ "hjetpt_genvsreco" + suffix, njetptbin_gen * 100, self.lvar2_binmin_gen[0], \ self.lvar2_binmax_gen[-1], njetptbin_reco * 100, self.lvar2_binmin_reco[0], \ self.lvar2_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_pr_zbin, hjetpt_genvsreco_pr, "pt_gen_jet", "pt_jet") norm_pr = hjetpt_genvsreco_pr.Integral(1, -1, 1, -1) if norm_pr > 0: hjetpt_genvsreco_pr.Scale(1.0/norm_pr) hjetpt_genvsreco_pr.Write() for ibinshape in range(len(self.lvarshape_binmin_gen)): dtmp_nonprompt_zgen = seldf_singlevar(df_mc_reco_merged_nonprompt, \ self.v_varshape_binning_gen, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape]) suffix = "%s_%.2f_%.2f" % \ (self.v_varshape_binning, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape]) hz_fracdiff = TH1F("hz_fracdiff_nonprompt" + suffix, "hz_fracdiff_nonprompt" + suffix, 100, -2, 2) fill_hist(hz_fracdiff, (dtmp_nonprompt_zgen[self.v_varshape_binning] - \ dtmp_nonprompt_zgen[self.v_varshape_binning_gen])/dtmp_nonprompt_zgen[self.v_varshape_binning_gen]) norm = hz_fracdiff.Integral(1, -1) if norm: hz_fracdiff.Scale(1.0 / norm) hz_fracdiff.Write() dtmp_prompt_zgen = seldf_singlevar(df_mc_reco_merged_prompt, \ self.v_varshape_binning_gen, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape]) suffix = "%s_%.2f_%.2f" % \ (self.v_varshape_binning, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape]) hz_fracdiff_pr = TH1F("hz_fracdiff_prompt" + suffix, "hz_fracdiff_prompt" + suffix, 100, -2, 2) fill_hist(hz_fracdiff_pr, (dtmp_prompt_zgen[self.v_varshape_binning] - \ dtmp_prompt_zgen[self.v_varshape_binning_gen])/dtmp_prompt_zgen[self.v_varshape_binning_gen]) norm_pr = hz_fracdiff_pr.Integral(1, -1) if norm_pr: hz_fracdiff_pr.Scale(1.0 / norm_pr) hz_fracdiff_pr.Write() for ibin2 in range(self.p_nbin2_gen): dtmp_nonprompt_jetptgen = seldf_singlevar(df_mc_reco_merged_nonprompt, \ "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) hjetpt_fracdiff = TH1F("hjetpt_fracdiff_nonprompt" + suffix, "hjetpt_fracdiff_nonprompt" + suffix, 100, -2, 2) fill_hist(hjetpt_fracdiff, (dtmp_nonprompt_jetptgen["pt_jet"] - \ dtmp_nonprompt_jetptgen["pt_gen_jet"])/dtmp_nonprompt_jetptgen["pt_gen_jet"]) norm = hjetpt_fracdiff.Integral(1, -1) if norm: hjetpt_fracdiff.Scale(1.0 / norm) hjetpt_fracdiff.Write() dtmp_prompt_jetptgen = seldf_singlevar(df_mc_reco_merged_prompt, \ "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) hjetpt_fracdiff_pr = TH1F("hjetpt_fracdiff_prompt" + suffix, "hjetpt_fracdiff_prompt" + suffix, 100, -2, 2) fill_hist(hjetpt_fracdiff_pr, (dtmp_prompt_jetptgen["pt_jet"] - \ dtmp_prompt_jetptgen["pt_gen_jet"])/dtmp_prompt_jetptgen["pt_gen_jet"]) norm_pr = hjetpt_fracdiff_pr.Integral(1, -1) if norm_pr: hjetpt_fracdiff_pr.Scale(1.0 / norm_pr) hjetpt_fracdiff_pr.Write() df_mc_reco_merged_prompt_train, df_mc_reco_merged_prompt_test = \ train_test_split(df_mc_reco_merged_prompt, test_size=self.closure_frac) df_tmp_selgen_pr_test, df_tmp_selreco_pr_test, df_tmp_selrecogen_pr_test = \ self.create_df_closure(df_mc_reco_merged_prompt_test) _, _, df_tmp_selrecogen_pr_train = \ self.create_df_closure(df_mc_reco_merged_prompt_train) fill2dhist(df_tmp_selreco_pr_test, hzvsjetpt_reco_closure_pr, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selgen_pr_test, hzvsjetpt_gen_closure_pr, self.v_varshape_binning_gen, "pt_gen_jet") hzvsjetpt_reco_closure_pr.Write("input_closure_reco") hzvsjetpt_gen_closure_pr.Write("input_closure_gen") for ibin2 in range(self.p_nbin2_gen): suffix = "%s_%.2f_%.2f" % \ (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) hz_gen_nocuts_closure = TH1F("hz_gen_nocuts_closure" + suffix, "hz_gen_nocuts_closure" + suffix, nzbin_gen, zbinarray_gen) hz_gen_nocuts_closure.Sumw2() hz_gen_cuts_closure = TH1F("hz_gen_cuts_closure" + suffix, "hz_gen_cuts_closure" + suffix, nzbin_gen, zbinarray_gen) hz_gen_cuts_closure.Sumw2() df_tmp_selgen_pr_test_bin = seldf_singlevar(df_tmp_selgen_pr_test, \ "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) df_tmp_selrecogen_pr_test_bin = seldf_singlevar(df_tmp_selrecogen_pr_test, \ "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) fill_hist(hz_gen_nocuts_closure, df_tmp_selgen_pr_test_bin[self.v_varshape_binning_gen]) fill_hist(hz_gen_cuts_closure, df_tmp_selrecogen_pr_test_bin[self.v_varshape_binning_gen]) hz_gen_cuts_closure.Write() hz_gen_nocuts_closure.Write() fill_hist(hjetpt_gen_nocuts_closure, df_tmp_selgen_pr_test["pt_gen_jet"]) fill_hist(hjetpt_gen_cuts_closure, df_tmp_selrecogen_pr_test["pt_gen_jet"]) hjetpt_gen_nocuts_closure.Write() hjetpt_gen_cuts_closure.Write() hzvsjetpt_reco_nocuts_closure = TH2F("hzvsjetpt_reco_nocuts_closure", "hzvsjetpt_reco_nocuts_closure", nzbin_reco, zbinarray_reco, njetptbin_reco, jetptbinarray_reco) hzvsjetpt_reco_nocuts_closure.Sumw2() hzvsjetpt_reco_cuts_closure = TH2F("hzvsjetpt_reco_cuts_closure", "hzvsjetpt_reco_cuts_closure", nzbin_reco, zbinarray_reco, njetptbin_reco, jetptbinarray_reco) hzvsjetpt_reco_cuts_closure.Sumw2() fill2dhist(df_tmp_selreco_pr_test, hzvsjetpt_reco_nocuts_closure, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selrecogen_pr_test, hzvsjetpt_reco_cuts_closure, self.v_varshape_binning, "pt_jet") hzvsjetpt_reco_nocuts_closure.Write() hzvsjetpt_reco_cuts_closure.Write() for row in df_tmp_selrecogen_pr.itertuples(): response_matrix_weight = 1.0 if self.doprior is True: binx = hzvsjetpt_prior_weights.GetXaxis().FindBin(getattr(row, self.v_varshape_binning_gen)) biny = hzvsjetpt_prior_weights.GetYaxis().FindBin(row.pt_gen_jet) weight = hzvsjetpt_prior_weights.GetBinContent(binx, biny) if weight > 0.0: response_matrix_weight = 1.0/weight response_matrix_pr.Fill(getattr(row, self.v_varshape_binning), row.pt_jet,\ getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet, response_matrix_weight) for row in df_tmp_selrecogen_pr_train.itertuples(): response_matrix_weight = 1.0 if self.doprior is True: binx = hzvsjetpt_prior_weights.GetXaxis().FindBin(getattr(row, self.v_varshape_binning_gen)) biny = hzvsjetpt_prior_weights.GetYaxis().FindBin(row.pt_gen_jet) weight = hzvsjetpt_prior_weights.GetBinContent(binx, biny) if weight > 0.0: response_matrix_weight = 1.0/weight response_matrix_closure_pr.Fill(getattr(row, self.v_varshape_binning), row.pt_jet,\ getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet, response_matrix_weight) response_matrix_pr.Write("response_matrix") response_matrix_closure_pr.Write("response_matrix_closure") out_file.Close()
def cutvariation_masshistos(self, min_cv_cut, max_cv_cut): myfile = TFile.Open(self.n_filemass_cutvar, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged_data[bin_id], "rb")) stepsmin = (self.lpt_probcutfin[bin_id] - min_cv_cut[ipt]) / self.p_ncutvar stepsmax = (max_cv_cut[ipt] - self.lpt_probcutfin[bin_id]) / self.p_ncutvar ntrials = 2 * self.p_ncutvar + 1 icvmax = 1 if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger_data is not None: df = df.query(self.s_trigger_data) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) print("Using run selection for mass histo", self.runlistrigger[self.triggerbit], \ "for period", self.period) df = selectdfrunlist(df, self.run_param[self.runlistrigger[self.triggerbit]], \ "run_number") for icv in range(ntrials): if icv < self.p_ncutvar: selml_cvval = min_cv_cut[ipt] + icv * stepsmin elif icv == self.p_ncutvar: selml_cvval = self.lpt_probcutfin[bin_id] else: selml_cvval = self.lpt_probcutfin[bin_id] + icvmax * stepsmax icvmax = icvmax + 1 selml_cv = "y_test_prob%s>%s" % (self.p_modelname, selml_cvval) print("Cutting on: ", selml_cv) df = df.query(selml_cv) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%d_%s%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], icv, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) if "INT7" not in self.triggerbit: fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning) funcweighttrig = fileweight.Get(namefunction) if funcweighttrig: weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1./weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write()