def prepare_data_mc_mcgen(self): self.logger.info("Prepare data reco as well as MC reco and gen") if os.path.exists(self.f_reco_applieddata) \ and os.path.exists(self.f_reco_appliedmc) \ and self.step_done("preparemlsamples_data_mc_mcgen"): self.df_data = pickle.load(openfile(self.f_reco_applieddata, "rb")) self.df_mc = pickle.load(openfile(self.f_reco_appliedmc, "rb")) else: self.df_data = pickle.load(openfile(self.f_reco_data, "rb")) self.df_mc = pickle.load(openfile(self.f_reco_mc, "rb")) self.df_data = selectdfquery(self.df_data, self.p_evtsel) self.df_mc = selectdfquery(self.df_mc, self.p_evtsel) self.df_data = selectdfquery(self.df_data, self.p_triggersel_data) self.df_mc = selectdfquery(self.df_mc, self.p_triggersel_mc) self.df_mcgen = pickle.load(openfile(self.f_gen_mc, "rb")) self.df_mcgen = selectdfquery(self.df_mcgen, self.p_evtsel) self.df_mcgen = selectdfquery(self.df_mcgen, self.p_triggersel_mc) self.df_mcgen = self.df_mcgen.query(self.p_presel_gen_eff) self.arraydf = [self.df_data, self.df_mc] self.df_mc = seldf_singlevar(self.df_mc, self.v_bin, self.p_binmin, self.p_binmax) self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin, self.p_binmin, self.p_binmax) self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin, self.p_binmax)
def process_histomass(self): myfile = TFile.Open(self.n_filemass, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb")) df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) myfile.cd() h_invmass.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write()
def skim(self, file_index): try: dfreco = pickle.load(openfile(self.l_reco[file_index], "rb")) except Exception as e: # pylint: disable=broad-except print('failed to open file', self.l_reco[file_index], str(e)) for ipt in range(self.p_nptbins): dfrecosk = seldf_singlevar(dfreco, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) dfrecosk = selectdfquery(dfrecosk, self.s_reco_skim[ipt]) dfrecosk = dfrecosk.reset_index(drop=True) f = openfile(self.mptfiles_recosk[ipt][file_index], "wb") pickle.dump(dfrecosk, f, protocol=4) f.close() if self.mcordata == "mc": try: dfgen = pickle.load(openfile(self.l_gen[file_index], "rb")) except Exception as e: # pylint: disable=broad-except print('failed to open MC file', self.l_gen[file_index], str(e)) dfgensk = seldf_singlevar(dfgen, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) dfgensk = selectdfquery(dfgensk, self.s_gen_skim[ipt]) dfgensk = dfgensk.reset_index(drop=True) pickle.dump(dfgensk, openfile(self.mptfiles_gensk[ipt][file_index], "wb"), protocol=4)
def preparesample(self): self.logger.info("Prepare Sample") self.df_data = pickle.load(openfile(self.f_reco_data, "rb")) self.df_mc = pickle.load(openfile(self.f_reco_mc, "rb")) self.df_mcgen = pickle.load(openfile(self.f_gen_mc, "rb")) self.df_mcgen = self.df_mcgen.query(self.p_presel_gen_eff) arraydf = [self.df_data, self.df_mc] self.df_mc = seldf_singlevar(self.df_mc, self.v_bin, self.p_binmin, self.p_binmax) self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin, self.p_binmin, self.p_binmax) self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin, self.p_binmax) self.df_sig, self.df_bkg = arraydf[self.p_tagsig], arraydf[self.p_tagbkg] self.df_sig = seldf_singlevar(self.df_sig, self.v_bin, self.p_binmin, self.p_binmax) self.df_bkg = seldf_singlevar(self.df_bkg, self.v_bin, self.p_binmin, self.p_binmax) self.df_sig = self.df_sig.query(self.s_selsigml) self.df_bkg = self.df_bkg.query(self.s_selbkgml) self.df_bkg["ismcsignal"] = 0 self.df_bkg["ismcprompt"] = 0 self.df_bkg["ismcfd"] = 0 self.df_bkg["ismcbkg"] = 0 if self.p_nsig > len(self.df_sig): self.logger.warning("There are not enough signal events") if self.p_nbkg > len(self.df_bkg): self.logger.warning("There are not enough background events") self.p_nsig = min(len(self.df_sig), self.p_nsig) self.p_nbkg = min(len(self.df_bkg), self.p_nbkg) self.logger.info("Used number of signal events is %d", self.p_nsig) self.logger.info("Used number of background events is %d", self.p_nbkg) self.df_ml = pd.DataFrame() self.df_sig = shuffle(self.df_sig, random_state=self.rnd_shuffle) self.df_bkg = shuffle(self.df_bkg, random_state=self.rnd_shuffle) self.df_sig = self.df_sig[:self.p_nsig] self.df_bkg = self.df_bkg[:self.p_nbkg] self.df_sig[self.v_sig] = 1 self.df_bkg[self.v_sig] = 0 self.df_ml = pd.concat([self.df_sig, self.df_bkg]) self.df_mltrain, self.df_mltest = train_test_split(self.df_ml, \ test_size=self.test_frac, random_state=self.rnd_splt) self.df_mltrain = self.df_mltrain.reset_index(drop=True) self.df_mltest = self.df_mltest.reset_index(drop=True) self.df_sigtrain, self.df_bkgtrain = split_df_sigbkg(self.df_mltrain, self.v_sig) self.df_sigtest, self.df_bkgtest = split_df_sigbkg(self.df_mltest, self.v_sig) self.logger.info("Total number of candidates: train %d and test %d", len(self.df_mltrain), len(self.df_mltest)) self.logger.info("Number of signal candidates: train %d and test %d", len(self.df_sigtrain), len(self.df_sigtest)) self.logger.info("Number of bkg candidates: %d and test %d", len(self.df_bkgtrain), len(self.df_bkgtest)) self.df_xtrain = self.df_mltrain[self.v_train] self.df_ytrain = self.df_mltrain[self.v_sig] self.df_xtest = self.df_mltest[self.v_train] self.df_ytest = self.df_mltest[self.v_sig]
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) if self.runlistrigger is not None: dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger], "run_number") hNorm = TH1F("hEvForNorm", "hEvForNorm", 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 if not dfevtorig.empty: nselevt = len(dfevtorig.query("is_ev_rej==0")) norm = getnormforselevt(dfevtorig) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() dfevtorig = dfevtorig.query("is_ev_rej==0") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.doml is True: df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) suffix = "%s%d_%d" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) if self.runlistrigger is not None: df = selectdfrunlist(df, \ self.run_param[self.runlistrigger], "run_number") fill_hist(h_invmass, df.inv_mass) myfile.cd() h_invmass.Write() if self.mcordata == "mc": df[self.v_ismcrefl] = np.array(tag_bit_df( df, self.v_bitvar, self.b_mcrefl), dtype=int) df_sig = df[df[self.v_ismcsignal] == 1] df_refl = df[df[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_sig.inv_mass) fill_hist(h_invmass_refl, df_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write() print("FINISHED")
def skim(self, file_index): dfreco = pickle.load(open(self.l_reco[file_index], "rb")) for ipt in range(self.p_nptbins): dfrecosk = seldf_singlevar(dfreco, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) dfrecosk = selectdfquery(dfrecosk, self.s_reco_skim[ipt]) dfrecosk = dfrecosk.reset_index(drop=True) dfrecosk.to_pickle(self.mptfiles_recosk[ipt][file_index]) if self.mcordata == "mc": dfgen = pickle.load(open(self.l_gen[file_index], "rb")) dfgensk = seldf_singlevar(dfgen, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) dfgensk = selectdfquery(dfgensk, self.s_gen_skim[ipt]) dfgensk = dfgensk.reset_index(drop=True) dfgensk.to_pickle(self.mptfiles_gensk[ipt][file_index])
def process_histomass(self): myfile = TFile.Open(self.n_filemass, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb")) df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) weights = None #apply_weights = self.datap["analysis"][self.typean]["triggersel"]["weights"] #if apply_weights is not None: # filenorm = TFile.Open("norm.root", "read") # hnorm = filenorm.Get("hnorm_" + apply_weights[0] + "_" + apply_weights[1]) # weights = [hnorm.GetBinContent(hnorm.FindBin(_bin)) \ # for _bin in df_bin[apply_weights[0]]] fill_hist(h_invmass, df_bin.inv_mass, weights=weights) myfile.cd() h_invmass.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write()
def process_histomass(self): myfile = TFile.Open(self.n_filemass, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb")) df = df.query(self.l_selml[bin_id]) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%d_%d" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) myfile.cd() h_invmass.Write()
def create_df_closure(self, df_): df_tmp_selgen = df_.copy() df_tmp_selgen = seldf_singlevar(df_tmp_selgen, self.v_varshape_binning_gen, \ self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1]) df_tmp_selgen = seldf_singlevar(df_tmp_selgen, "pt_gen_jet", \ self.lvar2_binmin_gen[0], self.lvar2_binmax_gen[-1]) df_tmp_selreco = df_.copy() df_tmp_selreco = seldf_singlevar(df_tmp_selreco, "pt_jet", \ self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) df_tmp_selreco = seldf_singlevar(df_tmp_selreco, self.v_varshape_binning, \ self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) df_tmp_selrecogen = df_tmp_selgen.copy() df_tmp_selrecogen = seldf_singlevar(df_tmp_selrecogen, "pt_jet", \ self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) df_tmp_selrecogen = seldf_singlevar(df_tmp_selrecogen, self.v_varshape_binning, \ self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) return df_tmp_selgen, df_tmp_selreco, df_tmp_selrecogen
def process_efficiency_single(self, index): out_file = TFile.Open(self.l_histoeff[index], "recreate") for ibin2 in range(self.p_nbin2_reco): stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], \ self.lvar2_binmax_reco[ibin2]) n_bins = self.p_nptfinbins analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1]) analysis_bin_lims = array.array('f', analysis_bin_lims_temp) h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) bincounter = 0 for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_jetsel_reco is not None: df_mc_reco = df_mc_reco.query(self.s_jetsel_reco) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) df_mc_reco = selectdfrunlist(df_mc_reco, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_gen = pickle.load(openfile(self.mptfiles_gensk[bin_id][index], "rb")) df_mc_gen = df_mc_gen.query(self.s_jetsel_gen) df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] df_reco_sel_pr = None if self.doml is True: df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id]) else: df_reco_sel_pr = df_reco_presel_pr.copy() df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1] df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1] df_reco_sel_fd = None if self.doml is True: df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id]) else: df_reco_sel_fd = df_reco_presel_fd.copy() val = len(df_gen_sel_pr) err = math.sqrt(val) h_gen_pr.SetBinContent(bincounter + 1, val) h_gen_pr.SetBinError(bincounter + 1, err) val = len(df_reco_presel_pr) err = math.sqrt(val) h_presel_pr.SetBinContent(bincounter + 1, val) h_presel_pr.SetBinError(bincounter + 1, err) val = len(df_reco_sel_pr) err = math.sqrt(val) h_sel_pr.SetBinContent(bincounter + 1, val) h_sel_pr.SetBinError(bincounter + 1, err) val = len(df_gen_sel_fd) err = math.sqrt(val) h_gen_fd.SetBinContent(bincounter + 1, val) h_gen_fd.SetBinError(bincounter + 1, err) val = len(df_reco_presel_fd) err = math.sqrt(val) h_presel_fd.SetBinContent(bincounter + 1, val) h_presel_fd.SetBinError(bincounter + 1, err) val = len(df_reco_sel_fd) err = math.sqrt(val) h_sel_fd.SetBinContent(bincounter + 1, val) h_sel_fd.SetBinError(bincounter + 1, err) bincounter = bincounter + 1 out_file.cd() h_gen_pr.Write() h_presel_pr.Write() h_sel_pr.Write() h_gen_fd.Write() h_presel_fd.Write() h_sel_fd.Write()
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.doml is True: df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_jetsel_reco is not None: df = df.query(self.s_jetsel_reco) if self.s_trigger is not None: df = df.query(self.s_trigger) h_invmass_all = TH1F("hmass_%d" % ipt, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_all, df.inv_mass) myfile.cd() h_invmass_all.Write() df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(self.p_nbin2_reco): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) df_bin = selectdfrunlist(df_bin, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") # add the z column df_bin["z"] = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass, df_bin.inv_mass) myfile.cd() h_invmass.Write() massarray = [1.0 + i * (5.0 / 5000.0) for i in range(5001)] # 5000 bins in range 1.0-6.0 massarray_reco = array.array('d', massarray) zarray_reco = array.array('d', self.varshaperanges_reco) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", \ 5000, massarray_reco, self.p_nbinshape_reco, zarray_reco) h_zvsinvmass.Sumw2() fill2dhist(df_bin, h_zvsinvmass, "inv_mass", self.v_varshape_binning) h_zvsinvmass.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write()
def process_efficiency_single(self, index): out_file = TFile.Open(self.l_histoeff[index], "recreate") h_list = [] for ibin2 in range(len(self.lvar2_binmin)): stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) n_bins = len(self.lpt_finbinmin) analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1]) analysis_bin_lims = array.array('f', analysis_bin_lims_temp) def make_histo(name, title, name_extra=stringbin2, bins=n_bins, binning=analysis_bin_lims): histo = TH1F(name + name_extra, title, bins, binning) h_list.append(histo) return histo h_gen_pr = make_histo("h_gen_pr", "Prompt Generated in acceptance |y|<0.5") h_presel_pr = make_histo("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel") h_presel_pr_wotof = make_histo( "h_presel_pr_wotof", "Prompt Reco in acc woTOF |#eta|<0.8 and pre-sel") h_presel_pr_wtof = make_histo( "h_presel_pr_wtof", "Prompt Reco in acc wTOF |#eta|<0.8 and pre-sel") h_sel_pr = make_histo( "h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel") h_sel_pr_wotof = make_histo( "h_sel_pr_wotof", "Prompt Reco and sel woTOF in acc |#eta|<0.8") h_sel_pr_wtof = make_histo( "h_sel_pr_wtof", "Prompt Reco and sel wTOF in acc |#eta|<0.8") h_gen_fd = make_histo("h_gen_fd", "FD Generated in acceptance |y|<0.5") h_presel_fd = make_histo("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel") h_sel_fd = make_histo("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel") bincounter = 0 for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) if self.runlistrigger is not None: df_mc_reco = selectdfrunlist(df_mc_reco, \ self.run_param[self.runlistrigger], "run_number") df_mc_gen = pickle.load( openfile(self.mptfiles_gensk[bin_id][index], "rb")) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) if self.runlistrigger is not None: df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) # Whether or not to cut on the 2nd binning variable if self.mc_cut_on_binning2: df_mc_reco = seldf_singlevar_inclusive(df_mc_reco, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_mc_gen = seldf_singlevar_inclusive(df_mc_gen, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] df_reco_sel_pr = None if self.doml is True: df_reco_sel_pr = df_reco_presel_pr.query( self.l_selml[bin_id]) else: df_reco_sel_pr = df_reco_presel_pr.copy() df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1] df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1] df_reco_sel_fd = None if self.doml is True: df_reco_sel_fd = df_reco_presel_fd.query( self.l_selml[bin_id]) else: df_reco_sel_fd = df_reco_presel_fd.copy() def set_content(df_to_use, histogram, i_b=ibin2, b_c=bincounter): if self.corr_eff_mult[i_b] is True: val, err = self.get_reweighted_count(df_to_use, i_b) else: val = len(df_to_use) err = math.sqrt(val) histogram.SetBinContent(b_c + 1, val) histogram.SetBinError(b_c + 1, err) set_content(df_gen_sel_pr, h_gen_pr) if "nsigTOF_Pr_0" in df_reco_presel_pr: set_content( df_reco_presel_pr[ df_reco_presel_pr.nsigTOF_Pr_0 < -998], h_presel_pr_wotof) set_content( df_reco_presel_pr[ df_reco_presel_pr.nsigTOF_Pr_0 > -998], h_presel_pr_wtof) set_content(df_reco_presel_pr, h_presel_pr) set_content(df_reco_sel_pr, h_sel_pr) if "nsigTOF_Pr_0" in df_reco_sel_pr: set_content( df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 < -998], h_sel_pr_wotof) set_content( df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 > -998], h_sel_pr_wtof) set_content(df_gen_sel_fd, h_gen_fd) set_content(df_reco_presel_fd, h_presel_fd) set_content(df_reco_sel_fd, h_sel_fd) bincounter = bincounter + 1 out_file.cd() for h in h_list: h.Write() h_list = []
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) neventsorig = len(dfevtorig) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) neventsaftertrigger = len(dfevtorig) if self.runlistrigger is not None: dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger], "run_number") neventsafterrunsel = len(dfevtorig) dfevtevtsel = dfevtorig.query(self.s_evtsel) #validation plot for event selection neventsafterevtsel = len(dfevtevtsel) histonorm = TH1F("histonorm", "histonorm", 10, 0, 10) histonorm.SetBinContent(1, neventsorig) histonorm.GetXaxis().SetBinLabel(1, "tot events") histonorm.SetBinContent(2, neventsaftertrigger) histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger") histonorm.SetBinContent(3, neventsafterrunsel) histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel") histonorm.SetBinContent(4, neventsafterevtsel) histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel") for ibin2 in range(len(self.lvar2_binmin)): binneddf = seldf_singlevar_inclusive(dfevtevtsel, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) histonorm.SetBinContent(5 + ibin2, len(binneddf)) histonorm.GetXaxis().SetBinLabel(5 + ibin2, \ "tot events after mult sel %d - %d" % \ (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])) histonorm.Write() labeltrigger = "hbit%svs%s" % (self.triggerbit, self.v_var2_binning_gen) myfile.cd() hsel, hnovtxmult, hvtxoutmult = \ self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \ labeltrigger, self.v_var2_binning_gen) if self.usetriggcorrfunc is not None and self.mcordata == "data": hselweight, hnovtxmultweight, hvtxoutmultweight = \ self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \ labeltrigger, self.v_var2_binning_gen, self.usetriggcorrfunc) hselweight.Write() hnovtxmultweight.Write() hvtxoutmultweight.Write() hsel.Write() hnovtxmult.Write() hvtxoutmult.Write() list_df_recodtrig = [] for ipt in range(self.p_nptfinbins): # pylint: disable=too-many-nested-blocks bin_id = self.bin_matching[ipt] df = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) if self.runlistrigger is not None: df = selectdfrunlist(df, \ self.run_param[self.runlistrigger], "run_number") if self.doml is True: df = df.query(self.l_selml[bin_id]) list_df_recodtrig.append(df) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) if self.do_custom_analysis_cuts: df = self.apply_cuts_ptbin(df, ipt) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) curr_dir = myfile.mkdir(f"bin1_{ipt}_bin2_{ibin2}") meta_info = create_meta_info( self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2], self.lpt_probcutfin[bin_id]) write_meta_info(curr_dir, meta_info) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) if self.usetriggcorrfunc is not None and self.mcordata == "data": weights = self.make_weights( df_bin[self.v_var2_binning_gen], self.weightfunc, self.weighthist, self.usetriggcorrfunc) weightsinv = [1. / weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df( df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write() if self.event_cand_validation is True: df_recodtrig = pd.concat(list_df_recodtrig) df_recodtrig = df_recodtrig.query("inv_mass>%f and inv_mass<%f" % \ (self.mass - 0.15, self.mass + 0.15)) dfevtwithd = pd.merge(dfevtevtsel, df_recodtrig, on=self.v_evtmatch) label = "h%s" % self.v_var2_binning_gen histomult = TH1F(label, label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) fill_hist(histomult, dfevtevtsel[self.v_var2_binning_gen]) histomult.Write() labelwithd = "h%s_withd" % self.v_var2_binning_gen histomultwithd = TH1F(labelwithd, labelwithd, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) fill_hist(histomultwithd, dfevtwithd["%s_x" % self.v_var2_binning_gen]) histomultwithd.Write() # Validation histograms fill_validation_vertex(dfevtorig, dfevtevtsel, df_recodtrig).write() fill_validation_multiplicity(dfevtorig, dfevtevtsel, df_recodtrig).write() fill_validation_candidates(df_recodtrig).write() if self.mcordata == "mc": fill_validation_candidates( df_recodtrig[df_recodtrig[self.v_ismcsignal] == 1], "MC").write()
def cutvariation_efficiencies(self, min_cv_cut, max_cv_cut): myfile = TFile.Open(self.n_fileeff_cutvar, "recreate") h_gen_pr = [] h_sel_pr = [] h_gen_fd = [] h_sel_fd = [] idx = 0 for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged_mc[bin_id], "rb")) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger_mc is not None: df = df.query(self.s_trigger_mc) print("Using run selection for eff histo", self.runlistrigger[self.triggerbit], \ "for period", self.period) df = selectdfrunlist(df, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df = seldf_singlevar(df, self.v_var_binning, self.lpt_finbinmin[ipt], \ self.lpt_finbinmax[ipt]) df_gen = pickle.load(openfile(self.lpt_gendecmerged[bin_id], "rb")) df_gen = df_gen.query(self.s_presel_gen_eff) df_gen = selectdfrunlist(df_gen, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_gen = seldf_singlevar(df_gen, self.v_var_binning, self.lpt_finbinmin[ipt], \ self.lpt_finbinmax[ipt]) stepsmin = (self.lpt_probcutfin[bin_id] - min_cv_cut[ipt]) / self.p_ncutvar stepsmax = (max_cv_cut[ipt] - self.lpt_probcutfin[bin_id]) / self.p_ncutvar ntrials = 2 * self.p_ncutvar + 1 icvmax = 1 idx = 0 for icv in range(ntrials): if icv < self.p_ncutvar: selml_cvval = min_cv_cut[ipt] + icv * stepsmin elif icv == self.p_ncutvar: selml_cvval = self.lpt_probcutfin[bin_id] else: selml_cvval = self.lpt_probcutfin[bin_id] + icvmax * stepsmax icvmax = icvmax + 1 selml_cv = "y_test_prob%s>%s" % (self.p_modelname, selml_cvval) print("Cutting on: ", selml_cv) df = df.query(selml_cv) for ibin2 in range(len(self.lvar2_binmin)): stringbin2 = "_%d_%s_%.2f_%.2f" % (icv, \ self.v_var2_binning, \ self.lvar2_binmin[ibin2], \ self.lvar2_binmax[ibin2]) if ipt == 0: n_bins = len(self.lpt_finbinmin) analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1]) analysis_bin_lims = array('f', analysis_bin_lims_temp) h_gen_pr.append(TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims)) h_sel_pr.append(TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims)) h_gen_fd.append(TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims)) h_sel_fd.append(TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims)) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], \ self.lvar2_binmax[ibin2]) df_gen_bin = seldf_singlevar(df_gen, self.v_var2_binning, self.lvar2_binmin[ibin2], \ self.lvar2_binmax[ibin2]) df_sel_pr = df_bin[df_bin.ismcprompt == 1] df_gen_pr = df_gen_bin[df_gen_bin.ismcprompt == 1] df_sel_fd = df_bin[df_bin.ismcfd == 1] df_gen_fd = df_gen_bin[df_gen_bin.ismcfd == 1] h_gen_pr[idx].SetBinContent(ipt + 1, len(df_gen_pr)) h_gen_pr[idx].SetBinError(ipt + 1, math.sqrt(len(df_gen_pr))) h_sel_pr[idx].SetBinContent(ipt + 1, len(df_sel_pr)) h_sel_pr[idx].SetBinError(ipt + 1, math.sqrt(len(df_sel_pr))) h_gen_fd[idx].SetBinContent(ipt + 1, len(df_gen_fd)) h_gen_fd[idx].SetBinError(ipt + 1, math.sqrt(len(df_gen_fd))) h_sel_fd[idx].SetBinContent(ipt + 1, len(df_sel_fd)) h_sel_fd[idx].SetBinError(ipt + 1, math.sqrt(len(df_sel_fd))) idx = idx + 1 myfile.cd() for i in range(idx): h_gen_pr[i].Write() h_sel_pr[i].Write() h_gen_fd[i].Write() h_sel_fd[i].Write()
def preparesample(self): self.logger.info("Prepare Sample") filename_train = \ os.path.join(self.dirmlout, f"df_train_{self.p_binmin}_{self.p_binmax}.pkl") filename_test = \ os.path.join(self.dirmlout, f"df_test_{self.p_binmin}_{self.p_binmax}.pkl") if os.path.exists(filename_train) \ and os.path.exists(filename_test) \ and self.step_done("preparemlsamples"): self.df_mltrain = pickle.load(openfile(filename_train, "rb")) self.df_mltest = pickle.load(openfile(filename_test, "rb")) else: self.prepare_data_mc_mcgen() self.df_sig, self.df_bkg = self.arraydf[ self.p_tagsig], self.arraydf[self.p_tagbkg] self.df_sig = seldf_singlevar(self.df_sig, self.v_bin, self.p_binmin, self.p_binmax) self.df_bkg = seldf_singlevar(self.df_bkg, self.v_bin, self.p_binmin, self.p_binmax) self.df_sig = self.df_sig.query(self.s_selsigml) self.df_bkg = self.df_bkg.query(self.s_selbkgml) self.df_bkg["ismcsignal"] = 0 self.df_bkg["ismcprompt"] = 0 self.df_bkg["ismcfd"] = 0 self.df_bkg["ismcbkg"] = 0 if self.p_equalise_sig_bkg: self.p_nsig = min(len(self.df_sig), len(self.df_bkg), self.p_nsig) self.p_nbkg = min(len(self.df_sig), len(self.df_bkg), self.p_nbkg) self.df_ml = pd.DataFrame() self.df_sig = shuffle(self.df_sig, random_state=self.rnd_shuffle) self.df_bkg = shuffle(self.df_bkg, random_state=self.rnd_shuffle) self.df_sig = self.df_sig[:self.p_nsig] self.df_bkg = self.df_bkg[:self.p_nbkg] self.df_sig[self.v_sig] = 1 self.df_bkg[self.v_sig] = 0 self.df_ml = pd.concat([self.df_sig, self.df_bkg]) self.df_mltrain, self.df_mltest = train_test_split(self.df_ml, \ test_size=self.test_frac, random_state=self.rnd_splt) self.df_mltrain = self.df_mltrain.reset_index(drop=True) self.df_mltest = self.df_mltest.reset_index(drop=True) # Write for later usage pickle.dump(self.df_mltrain, openfile(filename_train, "wb"), protocol=4) pickle.dump(self.df_mltest, openfile(filename_test, "wb"), protocol=4) # Now continue with extracting signal and background stats and report self.df_sigtrain, self.df_bkgtrain = split_df_sigbkg( self.df_mltrain, self.v_sig) self.df_sigtest, self.df_bkgtest = split_df_sigbkg( self.df_mltest, self.v_sig) self.logger.info("Total number of candidates: train %d and test %d", len(self.df_mltrain), len(self.df_mltest)) self.logger.info("Number of signal candidates: train %d and test %d", len(self.df_sigtrain), len(self.df_sigtest)) self.logger.info("Number of bkg candidates: %d and test %d", len(self.df_bkgtrain), len(self.df_bkgtest)) self.logger.info("Aim for number of signal events: %d", self.p_nsig) self.logger.info("Aim for number of background events: %d", self.p_nbkg) if self.p_nsig > (len(self.df_sigtrain) + len(self.df_sigtest)): self.logger.warning("There are not enough signal events") if self.p_nbkg > (len(self.df_bkgtrain) + len(self.df_bkgtest)): self.logger.warning("There are not enough background events") if self.p_mask_values: self.logger.info("Maksing values for training and testing") mask_df(self.df_mltrain, self.p_mask_values) mask_df(self.df_mltest, self.p_mask_values) # Final preparation of signal and background samples for training and testing self.df_xtrain = self.df_mltrain[self.v_train] self.df_ytrain = self.df_mltrain[self.v_sig] self.df_xtest = self.df_mltest[self.v_train] self.df_ytest = self.df_mltest[self.v_sig] self.step_done("preparemlsamples")
def process_efficiency(self): out_file = TFile.Open(self.n_fileeff, "recreate") for ibin2 in range(len(self.lvar2_binmin)): stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin[ibin2], \ self.lvar2_binmax[ibin2]) print(stringbin2) n_bins = len(self.lpt_finbinmin) analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1]) analysis_bin_lims = array.array('f', analysis_bin_lims_temp) h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) bincounter = 0 for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load( openfile(self.lpt_recodecmerged[bin_id], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) df_mc_gen = pickle.load( openfile(self.lpt_gendecmerged[bin_id], "rb")) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id]) df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1] df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1] df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id]) h_gen_pr.SetBinContent(bincounter + 1, len(df_gen_sel_pr)) h_gen_pr.SetBinError(bincounter + 1, math.sqrt(len(df_gen_sel_pr))) h_presel_pr.SetBinContent(bincounter + 1, len(df_reco_presel_pr)) h_presel_pr.SetBinError(bincounter + 1, math.sqrt(len(df_reco_presel_pr))) h_sel_pr.SetBinContent(bincounter + 1, len(df_reco_sel_pr)) h_sel_pr.SetBinError(bincounter + 1, math.sqrt(len(df_reco_sel_pr))) #print("prompt efficiency tot ptbin=", bincounter, ", value = ", # len(df_reco_sel_pr)/len(df_gen_sel_pr)) h_gen_fd.SetBinContent(bincounter + 1, len(df_gen_sel_fd)) h_gen_fd.SetBinError(bincounter + 1, math.sqrt(len(df_gen_sel_fd))) h_presel_fd.SetBinContent(bincounter + 1, len(df_reco_presel_fd)) h_presel_fd.SetBinError(bincounter + 1, math.sqrt(len(df_reco_presel_fd))) h_sel_fd.SetBinContent(bincounter + 1, len(df_reco_sel_fd)) h_sel_fd.SetBinError(bincounter + 1, math.sqrt(len(df_reco_sel_fd))) #print("fd efficiency tot ptbin=", bincounter, ", value = ", # len(df_reco_sel_fd)/len(df_gen_sel_fd)) bincounter = bincounter + 1 out_file.cd() h_gen_pr.Write() h_presel_pr.Write() h_sel_pr.Write() h_gen_fd.Write() h_presel_fd.Write() h_sel_fd.Write()
def cutvariation_masshistos(self, min_cv_cut, max_cv_cut): myfile = TFile.Open(self.n_filemass_cutvar, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged_data[bin_id], "rb")) stepsmin = (self.lpt_probcutfin[bin_id] - min_cv_cut[ipt]) / self.p_ncutvar stepsmax = (max_cv_cut[ipt] - self.lpt_probcutfin[bin_id]) / self.p_ncutvar ntrials = 2 * self.p_ncutvar + 1 icvmax = 1 if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger_data is not None: df = df.query(self.s_trigger_data) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) print("Using run selection for mass histo", self.runlistrigger[self.triggerbit], \ "for period", self.period) df = selectdfrunlist(df, self.run_param[self.runlistrigger[self.triggerbit]], \ "run_number") for icv in range(ntrials): if icv < self.p_ncutvar: selml_cvval = min_cv_cut[ipt] + icv * stepsmin elif icv == self.p_ncutvar: selml_cvval = self.lpt_probcutfin[bin_id] else: selml_cvval = self.lpt_probcutfin[bin_id] + icvmax * stepsmax icvmax = icvmax + 1 selml_cv = "y_test_prob%s>%s" % (self.p_modelname, selml_cvval) print("Cutting on: ", selml_cv) df = df.query(selml_cv) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%d_%s%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], icv, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) if "INT7" not in self.triggerbit: fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning) funcweighttrig = fileweight.Get(namefunction) if funcweighttrig: weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1./weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write()
def process_histomass(self): myfile = TFile.Open(self.n_filemass, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb")) df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) triggerbit = self.datap["analysis"][self.typean]["triggerbit"] if "INT7" not in triggerbit and self.mcordata == "data": fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s" % self.triggerbit funcweighttrig = fileweight.Get(namefunction) weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1. / weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df( df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write()
def process_efficiency_single(self, index): #TO UPDATE TO DHADRON_MULT VERSION print("step1") out_file = TFile.Open(self.l_histoeff[index], "recreate") n_bins = len(self.lpt_finbinmin) analysis_bin_lims_temp = self.lpt_finbinmin.copy() analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1]) analysis_bin_lims = array.array('f', analysis_bin_lims_temp) h_gen_pr = TH1F("h_gen_pr", "Prompt Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_pr = TH1F("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_pr = TH1F("h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_gen_fd = TH1F("h_gen_fd", "FD Generated in acceptance |y|<0.5", \ n_bins, analysis_bin_lims) h_presel_fd = TH1F("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) h_sel_fd = TH1F("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel", \ n_bins, analysis_bin_lims) print("step2") bincounter = 0 for ipt in range(self.p_nptfinbins): print("step2a") bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) if self.runlistrigger is not None: df_mc_reco = selectdfrunlist(df_mc_reco, \ self.run_param[self.runlistrigger], "run_number") df_mc_gen = pickle.load( openfile(self.mptfiles_gensk[bin_id][index], "rb")) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) print("step2b") if self.runlistrigger is not None: df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger], "run_number") df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] df_reco_sel_pr = None if self.doml is True: df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id]) else: df_reco_sel_pr = df_reco_presel_pr.copy() df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1] df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1] df_reco_sel_fd = None print("step2d") if self.doml is True: df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id]) else: df_reco_sel_fd = df_reco_presel_fd.copy() val = len(df_gen_sel_pr) err = math.sqrt(val) h_gen_pr.SetBinContent(bincounter + 1, val) h_gen_pr.SetBinError(bincounter + 1, err) val = len(df_reco_presel_pr) err = math.sqrt(val) h_presel_pr.SetBinContent(bincounter + 1, val) h_presel_pr.SetBinError(bincounter + 1, err) val = len(df_reco_sel_pr) err = math.sqrt(val) h_sel_pr.SetBinContent(bincounter + 1, val) h_sel_pr.SetBinError(bincounter + 1, err) print("step2e") val = len(df_gen_sel_fd) err = math.sqrt(val) h_gen_fd.SetBinContent(bincounter + 1, val) h_gen_fd.SetBinError(bincounter + 1, err) val = len(df_reco_presel_fd) err = math.sqrt(val) h_presel_fd.SetBinContent(bincounter + 1, val) h_presel_fd.SetBinError(bincounter + 1, err) val = len(df_reco_sel_fd) err = math.sqrt(val) h_sel_fd.SetBinContent(bincounter + 1, val) h_sel_fd.SetBinError(bincounter + 1, err) bincounter = bincounter + 1 print("step2f") out_file.cd() h_gen_pr.Write() h_presel_pr.Write() h_sel_pr.Write() h_gen_fd.Write() h_presel_fd.Write() h_sel_fd.Write() print("FINALISED")
def process_response(self): """ First of all, we load all the mc gen and reco files that are skimmed in bins of HF candidate ptand we apply the standard selection to all of them. After this, we merged them all to create a single file of gen and reco monte carlo sample with all the HF candidate pt. In particular gen jets are selected according to run trigger, runlist, and gen jet zbin_recoand pseudorapidity. Reco candidates according to evt selection, eta jets, trigger and ml probability of the HF hadron """ zbin_reco = [] nzbin_reco = self.p_nbinshape_reco zbin_reco = self.varshaperanges_reco zbinarray_reco = array.array('d', zbin_reco) zbin_gen = [] nzbin_gen = self.p_nbinshape_gen zbin_gen = self.varshaperanges_gen zbinarray_gen = array.array('d', zbin_gen) jetptbin_reco = [] njetptbin_reco = self.p_nbin2_reco jetptbin_reco = self.var2ranges_reco jetptbinarray_reco = array.array('d', jetptbin_reco) jetptbin_gen = [] njetptbin_gen = self.p_nbin2_gen jetptbin_gen = self.var2ranges_gen jetptbinarray_gen = array.array('d', jetptbin_gen) candptbin = [] candptbin = self.lpt_finbinmin.copy() candptbin.append(self.lpt_finbinmax[-1]) candptbinarray = array.array('d', candptbin) out_file = TFile.Open(self.n_fileeff, "update") list_df_mc_reco = [] list_df_mc_gen = [] for iptskim, _ in enumerate(self.lpt_anbinmin): df_mc_gen = pickle.load(openfile(self.lpt_gendecmerged[iptskim], "rb")) df_mc_gen = selectdfrunlist(df_mc_gen, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") df_mc_gen = df_mc_gen.query(self.s_jetsel_gen) list_df_mc_gen.append(df_mc_gen) df_mc_reco = pickle.load(openfile(self.lpt_recodecmerged[iptskim], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_jetsel_reco is not None: df_mc_reco = df_mc_reco.query(self.s_jetsel_reco) if self.s_trigger is not None: df_mc_reco = df_mc_reco.query(self.s_trigger) if self.doml is True: df_mc_reco = df_mc_reco.query(self.l_selml[iptskim]) list_df_mc_reco.append(df_mc_reco) # Here we can merge the dataframes corresponding to different HF pt in a # single one. In addition we are here selecting only non prompt HF df_gen = pd.concat(list_df_mc_gen) df_mc_reco = pd.concat(list_df_mc_reco) # add the z columns df_gen["z"] = z_calc(df_gen.pt_jet, df_gen.phi_jet, df_gen.eta_jet, df_gen.pt_cand, df_gen.phi_cand, df_gen.eta_cand) df_mc_reco["z"] = z_calc(df_mc_reco.pt_jet, df_mc_reco.phi_jet, df_mc_reco.eta_jet, df_mc_reco.pt_cand, df_mc_reco.phi_cand, df_mc_reco.eta_cand) df_mc_reco["z_gen"] = z_gen_calc(df_mc_reco.pt_gen_jet, df_mc_reco.phi_gen_jet, df_mc_reco.eta_gen_jet, df_mc_reco.pt_gen_cand, df_mc_reco.delta_phi_gen_jet, df_mc_reco.delta_eta_gen_jet) df_gen_nonprompt = df_gen[df_gen.ismcfd == 1] df_gen_prompt = df_gen[df_gen.ismcprompt == 1] df_mc_reco_merged_nonprompt = df_mc_reco[df_mc_reco.ismcfd == 1] df_mc_reco_merged_prompt = df_mc_reco[df_mc_reco.ismcprompt == 1] # The following plots are 3d plots all at generated level of z, # pt_jet and pt_cand. This was used in the first version of the feeddown # subtraction, currently is obsolete hzvsjetpt_gen_unmatched = TH2F("hzvsjetpt_gen_unmatched", "hzvsjetpt_gen_unmatched", \ nzbin_gen, zbinarray_gen, njetptbin_gen, jetptbinarray_gen) df_zvsjetpt_gen_unmatched = df_gen_prompt.loc[:, [self.v_varshape_binning, "pt_jet"]] fill_hist(hzvsjetpt_gen_unmatched, df_zvsjetpt_gen_unmatched) hzvsjetpt_gen_unmatched.Write() titlehist = "hzvsjetptvscandpt_gen_nonprompt" hzvsjetptvscandpt_gen_nonprompt = makefill3dhist(df_gen_nonprompt, titlehist, \ zbinarray_gen, jetptbinarray_gen, candptbinarray, self.v_varshape_binning, "pt_jet", "pt_cand") hzvsjetptvscandpt_gen_nonprompt.Write() # hz_gen_nocuts is the distribution of generated z values in b in # bins of gen_jet pt before the reco z and jetpt selection. hz_gen_cuts # also includes cut on z reco and jet pt reco. These are used for overall # efficiency correction to estimate the fraction of candidates that are # in the reco range but outside the gen range and viceversa for ibin2 in range(self.p_nbin2_gen): suffix = "%s_%.2f_%.2f" % \ (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) hz_gen_nocuts = TH1F("hz_gen_nocuts_nonprompt" + suffix, \ "hz_gen_nocuts_nonprompt" + suffix, nzbin_gen, zbinarray_gen) hz_gen_nocuts.Sumw2() hz_gen_cuts = TH1F("hz_gen_cuts_nonprompt" + suffix, "hz_gen_cuts_nonprompt" + suffix, nzbin_gen, zbinarray_gen) hz_gen_cuts.Sumw2() df_tmp = seldf_singlevar(df_mc_reco_merged_nonprompt, "pt_gen_jet", \ self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) df_tmp = seldf_singlevar(df_tmp, self.v_varshape_binning_gen, \ self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1]) fill_hist(hz_gen_nocuts, df_tmp[self.v_varshape_binning_gen]) df_tmp = seldf_singlevar(df_tmp, "pt_jet", self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) df_tmp = seldf_singlevar(df_tmp, self.v_varshape_binning, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill_hist(hz_gen_cuts, df_tmp[self.v_varshape_binning_gen]) hz_gen_cuts.Write() hz_gen_nocuts.Write() # Addendum for unfolding hz_gen_nocuts_pr = TH1F("hz_gen_nocuts" + suffix, \ "hz_gen_nocuts" + suffix, nzbin_gen, zbinarray_gen) hz_gen_nocuts_pr.Sumw2() hz_gen_cuts_pr = TH1F("hz_gen_cuts" + suffix, "hz_gen_cuts" + suffix, nzbin_gen, zbinarray_gen) hz_gen_cuts_pr.Sumw2() df_tmp_pr = seldf_singlevar(df_mc_reco_merged_prompt, "pt_gen_jet", \ self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) df_tmp_pr = seldf_singlevar(df_tmp_pr, self.v_varshape_binning_gen, \ self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1]) fill_hist(hz_gen_nocuts_pr, df_tmp_pr[self.v_varshape_binning_gen]) df_tmp_pr = seldf_singlevar(df_tmp_pr, "pt_jet", self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) df_tmp_pr = seldf_singlevar(df_tmp_pr, self.v_varshape_binning, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill_hist(hz_gen_cuts_pr, df_tmp_pr[self.v_varshape_binning_gen]) hz_gen_cuts_pr.Write() hz_gen_nocuts_pr.Write() # End addendum for unfolding df_tmp_selgen, df_tmp_selreco, df_tmp_selrecogen = \ self.create_df_closure(df_mc_reco_merged_nonprompt) df_tmp_selgen_pr, df_tmp_selreco_pr, df_tmp_selrecogen_pr = \ self.create_df_closure(df_mc_reco_merged_prompt) # histograms for response of feeddown hzvsjetpt_reco_nocuts = \ build2dhisto("hzvsjetpt_reco_nocuts_nonprompt", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_reco_cuts = \ build2dhisto("hzvsjetpt_reco_cuts_nonprompt", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_gen_nocuts = \ build2dhisto("hzvsjetpt_gen_nocuts_nonprompt", zbinarray_gen, jetptbinarray_gen) hzvsjetpt_gen_cuts = \ build2dhisto("hzvsjetpt_gen_cuts_nonprompt", zbinarray_gen, jetptbinarray_gen) hzvsjetpt_reco = hzvsjetpt_reco_nocuts.Clone("hzvsjetpt_reco_nonprompt") hzvsjetpt_gen = hzvsjetpt_gen_nocuts.Clone("hzvsjetpt_genv") response_matrix = RooUnfoldResponse(hzvsjetpt_reco, hzvsjetpt_gen) fill2dhist(df_tmp_selreco, hzvsjetpt_reco_nocuts, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selgen, hzvsjetpt_gen_nocuts, self.v_varshape_binning_gen, "pt_gen_jet") fill2dhist(df_tmp_selrecogen, hzvsjetpt_reco_cuts, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selrecogen, hzvsjetpt_gen_cuts, self.v_varshape_binning_gen, "pt_gen_jet") hzvsjetpt_reco_nocuts.Write() hzvsjetpt_gen_nocuts.Write() hzvsjetpt_reco_cuts.Write() hzvsjetpt_gen_cuts.Write() # histograms for unfolding hzvsjetpt_reco_nocuts_pr = \ build2dhisto("hzvsjetpt_reco_nocuts", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_reco_cuts_pr = \ build2dhisto("hzvsjetpt_reco_cuts", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_gen_nocuts_pr = \ build2dhisto("hzvsjetpt_gen_nocuts", zbinarray_gen, jetptbinarray_gen) hzvsjetpt_gen_cuts_pr = \ build2dhisto("hzvsjetpt_gen_cuts", zbinarray_gen, jetptbinarray_gen) fill2dhist(df_tmp_selreco_pr, hzvsjetpt_reco_nocuts_pr, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selgen_pr, hzvsjetpt_gen_nocuts_pr, self.v_varshape_binning_gen, "pt_gen_jet") fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_reco_cuts_pr, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_gen_cuts_pr, self.v_varshape_binning_gen, "pt_gen_jet") hzvsjetpt_reco_nocuts_pr.Write() hzvsjetpt_gen_nocuts_pr.Write() hzvsjetpt_reco_cuts_pr.Write() hzvsjetpt_gen_cuts_pr.Write() hzvsjetpt_reco_closure_pr = \ build2dhisto("hzvsjetpt_reco_closure", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_gen_closure_pr = \ build2dhisto("hzvsjetpt_gen_closure", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_reco_pr = \ build2dhisto("hzvsjetpt_reco", zbinarray_reco, jetptbinarray_reco) hzvsjetpt_gen_pr = \ build2dhisto("hzvsjetpt_gen", zbinarray_gen, jetptbinarray_gen) response_matrix_pr = RooUnfoldResponse(hzvsjetpt_reco_pr, hzvsjetpt_gen_pr) response_matrix_closure_pr = RooUnfoldResponse(hzvsjetpt_reco_pr, hzvsjetpt_gen_pr) fill2dhist(df_tmp_selreco_pr, hzvsjetpt_reco_pr, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selgen_pr, hzvsjetpt_gen_pr, self.v_varshape_binning_gen, "pt_gen_jet") hzvsjetpt_reco_pr.Write() hzvsjetpt_gen_pr.Write() hjetpt_gen_nocuts_pr = TH1F("hjetpt_gen_nocuts", \ "hjetpt_gen_nocuts", njetptbin_gen, jetptbinarray_gen) hjetpt_gen_cuts_pr = TH1F("hjetpt_gen_cuts", \ "hjetpt_gen_cuts", njetptbin_gen, jetptbinarray_gen) hjetpt_gen_nocuts_closure = TH1F("hjetpt_gen_nocuts_closure", \ "hjetpt_gen_nocuts_closure", njetptbin_gen, jetptbinarray_gen) hjetpt_gen_cuts_closure = TH1F("hjetpt_gen_cuts_closure", \ "hjetpt_gen_cuts_closure", njetptbin_gen, jetptbinarray_gen) hjetpt_gen_nocuts_pr.Sumw2() hjetpt_gen_cuts_pr.Sumw2() hjetpt_gen_nocuts_closure.Sumw2() hjetpt_gen_nocuts_closure.Sumw2() fill_hist(hjetpt_gen_nocuts_pr, df_tmp_selgen_pr["pt_gen_jet"]) fill_hist(hjetpt_gen_cuts_pr, df_tmp_selrecogen_pr["pt_gen_jet"]) hjetpt_gen_nocuts_pr.Write() hjetpt_gen_cuts_pr.Write() # end of histograms for unfolding hjetpt_genvsreco_full = \ TH2F("hjetpt_genvsreco_full_nonprompt", "hjetpt_genvsreco_full_nonprompt", \ njetptbin_gen * 100, self.lvar2_binmin_gen[0], self.lvar2_binmax_gen[-1], \ njetptbin_reco * 100, self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) hz_genvsreco_full = \ TH2F("hz_genvsreco_full_nonprompt", "hz_genvsreco_full_nonprompt", \ nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], nzbin_reco * 100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen, hjetpt_genvsreco_full, "pt_gen_jet", "pt_jet") hjetpt_genvsreco_full.Scale(1.0 / hjetpt_genvsreco_full.Integral(1, -1, 1, -1)) hjetpt_genvsreco_full.Write() fill2dhist(df_tmp_selrecogen, hz_genvsreco_full, self.v_varshape_binning_gen, self.v_varshape_binning) hz_genvsreco_full.Scale(1.0 / hz_genvsreco_full.Integral(1, -1, 1, -1)) hz_genvsreco_full.Write() for row in df_tmp_selrecogen.itertuples(): response_matrix.Fill(getattr(row, self.v_varshape_binning), row.pt_jet, getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet) response_matrix.Write("response_matrix_nonprompt") # histograms for unfolding hjetpt_genvsreco_full_pr = \ TH2F("hjetpt_genvsreco_full", "hjetpt_genvsreco_full", \ njetptbin_gen * 100, self.lvar2_binmin_gen[0], self.lvar2_binmax_gen[-1], \ njetptbin_reco * 100, self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1]) hz_genvsreco_full_pr = \ TH2F("hz_genvsreco_full", "hz_genvsreco_full", \ nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], nzbin_reco * 100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_pr, hjetpt_genvsreco_full_pr, "pt_gen_jet", "pt_jet") hjetpt_genvsreco_full_pr.Scale(1.0 / hjetpt_genvsreco_full_pr.Integral(1, -1, 1, -1)) hjetpt_genvsreco_full_pr.Write() fill2dhist(df_tmp_selrecogen_pr, hz_genvsreco_full_pr, self.v_varshape_binning_gen, self.v_varshape_binning) hz_genvsreco_full_pr.Scale(1.0 / hz_genvsreco_full_pr.Integral(1, -1, 1, -1)) hz_genvsreco_full_pr.Write() hzvsjetpt_prior_weights = build2dhisto("hzvsjetpt_prior_weights", \ zbinarray_gen, jetptbinarray_gen) fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_prior_weights, self.v_varshape_binning_gen, "pt_gen_jet") # end of histograms for unfolding for ibin2 in range(self.p_nbin2_reco): df_tmp_selrecogen_jetbin = seldf_singlevar(df_tmp_selrecogen, "pt_jet", \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) hz_genvsreco = TH2F("hz_genvsreco_nonprompt" + suffix, "hz_genvsreco_nonprompt" + suffix, \ nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], \ nzbin_reco*100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_jetbin, hz_genvsreco, self.v_varshape_binning_gen, self.v_varshape_binning) norm = hz_genvsreco.Integral(1, -1, 1, -1) if norm > 0: hz_genvsreco.Scale(1.0/norm) hz_genvsreco.Write() df_tmp_selrecogen_pr_jetbin = seldf_singlevar(df_tmp_selrecogen_pr, "pt_jet", \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, \ self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2]) hz_genvsreco_pr = TH2F("hz_genvsreco" + suffix, "hz_genvsreco" + suffix, \ nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], \ nzbin_reco*100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_pr_jetbin, hz_genvsreco_pr, self.v_varshape_binning_gen, self.v_varshape_binning) norm_pr = hz_genvsreco_pr.Integral(1, -1, 1, -1) if norm_pr > 0: hz_genvsreco_pr.Scale(1.0/norm_pr) hz_genvsreco_pr.Write() for ibinshape in range(len(self.lvarshape_binmin_reco)): df_tmp_selrecogen_zbin = seldf_singlevar(df_tmp_selrecogen, self.v_varshape_binning, \ self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape]) suffix = "%s_%.2f_%.2f" % \ (self.v_varshape_binning, self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape]) hjetpt_genvsreco = TH2F("hjetpt_genvsreco_nonprompt" + suffix, \ "hjetpt_genvsreco_nonprompt" + suffix, njetptbin_gen * 100, self.lvar2_binmin_gen[0], \ self.lvar2_binmax_gen[-1], njetptbin_reco * 100, self.lvar2_binmin_reco[0], \ self.lvar2_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_zbin, hjetpt_genvsreco, "pt_gen_jet", "pt_jet") norm = hjetpt_genvsreco.Integral(1, -1, 1, -1) if norm > 0: hjetpt_genvsreco.Scale(1.0/norm) hjetpt_genvsreco.Write() df_tmp_selrecogen_pr_zbin = seldf_singlevar(df_tmp_selrecogen_pr, self.v_varshape_binning, \ self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape]) suffix = "%s_%.2f_%.2f" % \ (self.v_varshape_binning, self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape]) hjetpt_genvsreco_pr = TH2F("hjetpt_genvsreco" + suffix, \ "hjetpt_genvsreco" + suffix, njetptbin_gen * 100, self.lvar2_binmin_gen[0], \ self.lvar2_binmax_gen[-1], njetptbin_reco * 100, self.lvar2_binmin_reco[0], \ self.lvar2_binmax_reco[-1]) fill2dhist(df_tmp_selrecogen_pr_zbin, hjetpt_genvsreco_pr, "pt_gen_jet", "pt_jet") norm_pr = hjetpt_genvsreco_pr.Integral(1, -1, 1, -1) if norm_pr > 0: hjetpt_genvsreco_pr.Scale(1.0/norm_pr) hjetpt_genvsreco_pr.Write() for ibinshape in range(len(self.lvarshape_binmin_gen)): dtmp_nonprompt_zgen = seldf_singlevar(df_mc_reco_merged_nonprompt, \ self.v_varshape_binning_gen, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape]) suffix = "%s_%.2f_%.2f" % \ (self.v_varshape_binning, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape]) hz_fracdiff = TH1F("hz_fracdiff_nonprompt" + suffix, "hz_fracdiff_nonprompt" + suffix, 100, -2, 2) fill_hist(hz_fracdiff, (dtmp_nonprompt_zgen[self.v_varshape_binning] - \ dtmp_nonprompt_zgen[self.v_varshape_binning_gen])/dtmp_nonprompt_zgen[self.v_varshape_binning_gen]) norm = hz_fracdiff.Integral(1, -1) if norm: hz_fracdiff.Scale(1.0 / norm) hz_fracdiff.Write() dtmp_prompt_zgen = seldf_singlevar(df_mc_reco_merged_prompt, \ self.v_varshape_binning_gen, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape]) suffix = "%s_%.2f_%.2f" % \ (self.v_varshape_binning, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape]) hz_fracdiff_pr = TH1F("hz_fracdiff_prompt" + suffix, "hz_fracdiff_prompt" + suffix, 100, -2, 2) fill_hist(hz_fracdiff_pr, (dtmp_prompt_zgen[self.v_varshape_binning] - \ dtmp_prompt_zgen[self.v_varshape_binning_gen])/dtmp_prompt_zgen[self.v_varshape_binning_gen]) norm_pr = hz_fracdiff_pr.Integral(1, -1) if norm_pr: hz_fracdiff_pr.Scale(1.0 / norm_pr) hz_fracdiff_pr.Write() for ibin2 in range(self.p_nbin2_gen): dtmp_nonprompt_jetptgen = seldf_singlevar(df_mc_reco_merged_nonprompt, \ "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) hjetpt_fracdiff = TH1F("hjetpt_fracdiff_nonprompt" + suffix, "hjetpt_fracdiff_nonprompt" + suffix, 100, -2, 2) fill_hist(hjetpt_fracdiff, (dtmp_nonprompt_jetptgen["pt_jet"] - \ dtmp_nonprompt_jetptgen["pt_gen_jet"])/dtmp_nonprompt_jetptgen["pt_gen_jet"]) norm = hjetpt_fracdiff.Integral(1, -1) if norm: hjetpt_fracdiff.Scale(1.0 / norm) hjetpt_fracdiff.Write() dtmp_prompt_jetptgen = seldf_singlevar(df_mc_reco_merged_prompt, \ "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) hjetpt_fracdiff_pr = TH1F("hjetpt_fracdiff_prompt" + suffix, "hjetpt_fracdiff_prompt" + suffix, 100, -2, 2) fill_hist(hjetpt_fracdiff_pr, (dtmp_prompt_jetptgen["pt_jet"] - \ dtmp_prompt_jetptgen["pt_gen_jet"])/dtmp_prompt_jetptgen["pt_gen_jet"]) norm_pr = hjetpt_fracdiff_pr.Integral(1, -1) if norm_pr: hjetpt_fracdiff_pr.Scale(1.0 / norm_pr) hjetpt_fracdiff_pr.Write() df_mc_reco_merged_prompt_train, df_mc_reco_merged_prompt_test = \ train_test_split(df_mc_reco_merged_prompt, test_size=self.closure_frac) df_tmp_selgen_pr_test, df_tmp_selreco_pr_test, df_tmp_selrecogen_pr_test = \ self.create_df_closure(df_mc_reco_merged_prompt_test) _, _, df_tmp_selrecogen_pr_train = \ self.create_df_closure(df_mc_reco_merged_prompt_train) fill2dhist(df_tmp_selreco_pr_test, hzvsjetpt_reco_closure_pr, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selgen_pr_test, hzvsjetpt_gen_closure_pr, self.v_varshape_binning_gen, "pt_gen_jet") hzvsjetpt_reco_closure_pr.Write("input_closure_reco") hzvsjetpt_gen_closure_pr.Write("input_closure_gen") for ibin2 in range(self.p_nbin2_gen): suffix = "%s_%.2f_%.2f" % \ (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) hz_gen_nocuts_closure = TH1F("hz_gen_nocuts_closure" + suffix, "hz_gen_nocuts_closure" + suffix, nzbin_gen, zbinarray_gen) hz_gen_nocuts_closure.Sumw2() hz_gen_cuts_closure = TH1F("hz_gen_cuts_closure" + suffix, "hz_gen_cuts_closure" + suffix, nzbin_gen, zbinarray_gen) hz_gen_cuts_closure.Sumw2() df_tmp_selgen_pr_test_bin = seldf_singlevar(df_tmp_selgen_pr_test, \ "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) df_tmp_selrecogen_pr_test_bin = seldf_singlevar(df_tmp_selrecogen_pr_test, \ "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2]) fill_hist(hz_gen_nocuts_closure, df_tmp_selgen_pr_test_bin[self.v_varshape_binning_gen]) fill_hist(hz_gen_cuts_closure, df_tmp_selrecogen_pr_test_bin[self.v_varshape_binning_gen]) hz_gen_cuts_closure.Write() hz_gen_nocuts_closure.Write() fill_hist(hjetpt_gen_nocuts_closure, df_tmp_selgen_pr_test["pt_gen_jet"]) fill_hist(hjetpt_gen_cuts_closure, df_tmp_selrecogen_pr_test["pt_gen_jet"]) hjetpt_gen_nocuts_closure.Write() hjetpt_gen_cuts_closure.Write() hzvsjetpt_reco_nocuts_closure = TH2F("hzvsjetpt_reco_nocuts_closure", "hzvsjetpt_reco_nocuts_closure", nzbin_reco, zbinarray_reco, njetptbin_reco, jetptbinarray_reco) hzvsjetpt_reco_nocuts_closure.Sumw2() hzvsjetpt_reco_cuts_closure = TH2F("hzvsjetpt_reco_cuts_closure", "hzvsjetpt_reco_cuts_closure", nzbin_reco, zbinarray_reco, njetptbin_reco, jetptbinarray_reco) hzvsjetpt_reco_cuts_closure.Sumw2() fill2dhist(df_tmp_selreco_pr_test, hzvsjetpt_reco_nocuts_closure, self.v_varshape_binning, "pt_jet") fill2dhist(df_tmp_selrecogen_pr_test, hzvsjetpt_reco_cuts_closure, self.v_varshape_binning, "pt_jet") hzvsjetpt_reco_nocuts_closure.Write() hzvsjetpt_reco_cuts_closure.Write() for row in df_tmp_selrecogen_pr.itertuples(): response_matrix_weight = 1.0 if self.doprior is True: binx = hzvsjetpt_prior_weights.GetXaxis().FindBin(getattr(row, self.v_varshape_binning_gen)) biny = hzvsjetpt_prior_weights.GetYaxis().FindBin(row.pt_gen_jet) weight = hzvsjetpt_prior_weights.GetBinContent(binx, biny) if weight > 0.0: response_matrix_weight = 1.0/weight response_matrix_pr.Fill(getattr(row, self.v_varshape_binning), row.pt_jet,\ getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet, response_matrix_weight) for row in df_tmp_selrecogen_pr_train.itertuples(): response_matrix_weight = 1.0 if self.doprior is True: binx = hzvsjetpt_prior_weights.GetXaxis().FindBin(getattr(row, self.v_varshape_binning_gen)) biny = hzvsjetpt_prior_weights.GetYaxis().FindBin(row.pt_gen_jet) weight = hzvsjetpt_prior_weights.GetBinContent(binx, biny) if weight > 0.0: response_matrix_weight = 1.0/weight response_matrix_closure_pr.Fill(getattr(row, self.v_varshape_binning), row.pt_jet,\ getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet, response_matrix_weight) response_matrix_pr.Write("response_matrix") response_matrix_closure_pr.Write("response_matrix_closure") out_file.Close()
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") for ibin2 in range(len(self.lvar2_binmin)): mybindfevtorig = seldf_singlevar(dfevtorig, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) hNorm = TH1F("hEvForNorm_mult%d" % ibin2, "hEvForNorm_mult%d" % ibin2, 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 if not mybindfevtorig.empty: nselevt = len(mybindfevtorig.query("is_ev_rej==0")) norm = getnormforselevt(mybindfevtorig) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() # histmultevt = TH1F("hmultevtmult%d" % ibin2, # "hmultevtmult%d" % ibin2, 100, 0, 100) mybindfevtorig = mybindfevtorig.query("is_ev_rej==0") # fill_hist(histmultevt, mybindfevtorig.n_tracklets_corr) # histmultevt.Write() # h_v0m_ntracklets = TH2F("h_v0m_ntracklets%d" % ibin2, # "h_v0m_ntracklets%d" % ibin2, # 200, 0, 200, 200, -0.5, 1999.5) # v_v0m_ntracklets = np.vstack((mybindfevtorig.n_tracklets_corr, # mybindfevtorig.v0m_corr)).T # fill_hist(h_v0m_ntracklets, v_v0m_ntracklets) # h_v0m_ntracklets.Write() for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.doml is True: df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_bin = selectdfrunlist(df_bin, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") fill_hist(h_invmass, df_bin.inv_mass) if "INT7" not in self.triggerbit and self.mcordata == "data": fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning_gen) funcweighttrig = fileweight.Get(namefunction) if funcweighttrig: weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1./weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() histmult = TH1F("hmultpt%dmult%d" % (ipt, ibin2), "hmultpt%dmult%d" % (ipt, ibin2), 1000, 0, 1000) fill_hist(histmult, df_bin.n_tracklets_corr) histmult.Write() h_v0m_ntrackletsD = TH2F("h_v0m_ntrackletsD%d%d" % (ibin2, ipt), "h_v0m_ntrackletsD%d%d" % (ibin2, ipt), 200, 0, 200, 200, -0.5, 1999.5) v_v0m_ntrackletsD = np.vstack((df_bin.n_tracklets_corr, df_bin.v0m_corr)).T fill_hist(h_v0m_ntrackletsD, v_v0m_ntrackletsD) h_v0m_ntrackletsD.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write()
def define_cutvariation_limits(self): min_cv_cut = [] max_cv_cut = [] ncutvar_temp = self.p_ncutvar * 2 for ipt in range(self.p_nptfinbins): print("Systematics pt-bin: ", ipt) bin_id = self.bin_matching[ipt] df_mc_reco = pickle.load(openfile(self.lpt_recodecmerged_mc[bin_id], "rb")) if self.s_evtsel is not None: df_mc_reco = df_mc_reco.query(self.s_evtsel) if self.s_trigger_mc is not None: df_mc_reco = df_mc_reco.query(self.s_trigger_mc) df_mc_gen = pickle.load(openfile(self.lpt_gendecmerged[bin_id], "rb")) df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff) df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var2_binning, \ self.lvar2_binmin[self.use_var2_bin], \ self.lvar2_binmax[self.use_var2_bin]) df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var2_binning, \ self.lvar2_binmin[self.use_var2_bin], \ self.lvar2_binmax[self.use_var2_bin]) df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1] df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1] selml_cent = "y_test_prob%s>%s" % (self.p_modelname, self.lpt_probcutfin[bin_id]) df_reco_sel_pr = df_reco_presel_pr.query(selml_cent) len_gen_pr = len(df_gen_sel_pr) eff_cent = len(df_reco_sel_pr)/len_gen_pr print("Central efficiency pt-bin", ipt, ": ", eff_cent) stepsmin = \ (self.lpt_probcutfin[bin_id] - self.p_cutvar_minrange[bin_id]) / ncutvar_temp min_cv_cut.append(self.lpt_probcutfin[bin_id]) df_reco_cvmin_pr = df_reco_presel_pr for icv in range(ncutvar_temp): min_cv_cut[ipt] = self.p_cutvar_minrange[bin_id] + icv * stepsmin selml_min = "y_test_prob%s>%s" % (self.p_modelname, min_cv_cut[ipt]) df_reco_cvmin_pr = df_reco_cvmin_pr.query(selml_min) eff_min = len(df_reco_cvmin_pr)/len_gen_pr if eff_cent == 0: break if eff_min / eff_cent < 1 + self.p_maxperccutvar: break eff_min = len(df_reco_cvmin_pr)/len_gen_pr print("Minimal efficiency pt-bin", ipt, ": ", eff_min) stepsmax = \ (self.p_cutvar_maxrange[bin_id] - self.lpt_probcutfin[bin_id]) / ncutvar_temp max_cv_cut.append(self.lpt_probcutfin[bin_id]) df_reco_cvmax_pr = df_reco_sel_pr for icv in range(ncutvar_temp): max_cv_cut[ipt] = self.lpt_probcutfin[bin_id] + icv * stepsmax selml_max = "y_test_prob%s>%s" % (self.p_modelname, max_cv_cut[ipt]) df_reco_cvmax_pr = df_reco_cvmax_pr.query(selml_max) eff_max = len(df_reco_cvmax_pr)/len_gen_pr if eff_cent == 0: break if eff_max / eff_cent < 1 - self.p_maxperccutvar: break eff_max = len(df_reco_cvmax_pr)/len_gen_pr print("Maximal efficiency pt-bin", ipt, ": ", eff_max) return min_cv_cut, max_cv_cut
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) neventsorig = len(dfevtorig) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) neventsaftertrigger = len(dfevtorig) if self.runlistrigger is not None: dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger], "run_number") neventsafterrunsel = len(dfevtorig) if self.s_evtsel is not None: dfevtevtsel = dfevtorig.query(self.s_evtsel) else: dfevtevtsel = dfevtorig neventsafterevtsel = len(dfevtevtsel) #validation plot for event selection histonorm = TH1F("histonorm", "histonorm", 10, 0, 10) histonorm.SetBinContent(1, neventsorig) histonorm.GetXaxis().SetBinLabel(1, "tot events") histonorm.SetBinContent(2, neventsaftertrigger) histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger") histonorm.SetBinContent(3, neventsafterrunsel) histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel") histonorm.SetBinContent(4, neventsafterevtsel) histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel") histonorm.Write() myfile.cd() labeltrigger = "hbit%s" % (self.triggerbit) hsel, hnovtxmult, hvtxoutmult = gethistonormforselevt_varsel(dfevtorig, dfevtevtsel, \ labeltrigger, self.s_var_evt_sel) hsel.Write() hnovtxmult.Write() hvtxoutmult.Write() for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) if self.runlistrigger is not None: df = selectdfrunlist(df, \ self.run_param[self.runlistrigger], "run_number") if self.doml is True: df = df.query(self.l_selml[bin_id]) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) if self.do_custom_analysis_cuts: df = self.apply_cuts_ptbin(df, ipt) if self.mltype == "MultiClassification": suffix = "%s%d_%d_%.2f%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0], self.lpt_probcutfin[ipt][1]) else: suffix = "%s%d_%d_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass, df[self.v_invmass]) myfile.cd() h_invmass.Write() if self.mcordata == "mc": df[self.v_ismcrefl] = np.array(tag_bit_df( df, self.v_bitvar, self.b_mcrefl), dtype=int) df_sig = df[df[self.v_ismcsignal] == 1] df_bkg = df[df[self.v_ismcbkg] == 1] df_refl = df[df[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_bkg = TH1F("hmass_bkg" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_sig[self.v_invmass]) fill_hist(h_invmass_bkg, df_bkg[self.v_invmass]) fill_hist(h_invmass_refl, df_refl[self.v_invmass]) myfile.cd() h_invmass_sig.Write() h_invmass_bkg.Write() h_invmass_refl.Write()