def gethistonormforselevt_mult(self, df_evt, dfevtevtsel, label, var, weightfunc=None): if weightfunc is not None: label = label + "_weight" hSelMult = TH1F('sel_' + label, 'sel_' + label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) hNoVtxMult = TH1F('novtx_' + label, 'novtx_' + label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) hVtxOutMult = TH1F('vtxout_' + label, 'vtxout_' + label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) df_to_keep = filter_bit_df(df_evt, 'is_ev_rej', [[], [0, 5, 6, 10, 11]]) # events with reco vtx after previous selection tag_vtx = tag_bit_df(df_to_keep, 'is_ev_rej', [[], [1, 2, 7, 12]]) df_no_vtx = df_to_keep[~tag_vtx.values] # events with reco zvtx > 10 cm after previous selection df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'is_ev_rej', [[3], [1, 2, 7, 12]]) if weightfunc is not None: weightssel = evaluate(weightfunc, dfevtevtsel[var]) weightsinvsel = [1./weight for weight in weightssel] fill_hist(hSelMult, dfevtevtsel[var], weights=weightsinvsel) weightsnovtx = evaluate(weightfunc, df_no_vtx[var]) weightsinvnovtx = [1./weight for weight in weightsnovtx] fill_hist(hNoVtxMult, df_no_vtx[var], weights=weightsinvnovtx) weightsgr10 = evaluate(weightfunc, df_bit_zvtx_gr10[var]) weightsinvgr10 = [1./weight for weight in weightsgr10] fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var], weights=weightsinvgr10) else: fill_hist(hSelMult, dfevtevtsel[var]) fill_hist(hNoVtxMult, df_no_vtx[var]) fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var]) return hSelMult, hNoVtxMult, hVtxOutMult
def gethistonormforselevt_varsel(df_evt, dfevtevtsel, label, varsel): hSelMult = TH1F('sel_' + label, 'sel_' + label, 1, -0.5, 0.5) hNoVtxMult = TH1F('novtx_' + label, 'novtx_' + label, 1, -0.5, 0.5) hVtxOutMult = TH1F('vtxout_' + label, 'vtxout_' + label, 1, -0.5, 0.5) df_to_keep = filter_bit_df(df_evt, varsel, [[], [0, 5, 6, 10, 11]]) # events with reco vtx after previous selection tag_vtx = tag_bit_df(df_to_keep, varsel, [[], [1, 2, 7, 12]]) df_no_vtx = df_to_keep[~tag_vtx.values] # events with reco zvtx > 10 cm after previous selection df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, varsel, [[3], [1, 2, 7, 12]]) hSelMult.SetBinContent(1, len(dfevtevtsel)) hNoVtxMult.SetBinContent(1, len(df_no_vtx)) hVtxOutMult.SetBinContent(1, len(df_bit_zvtx_gr10)) return hSelMult, hNoVtxMult, hVtxOutMult
def getnormforselevt(df_evt): #accepted events df_acc_ev = df_evt.query('is_ev_rej==0') #rejected events because of trigger / physics selection / centrality df_to_keep = filter_bit_df(df_evt, 'is_ev_rej', [[], [0, 5, 6, 10, 11]]) #events with reco vtx after previous selection df_bit_recovtx = filter_bit_df(df_to_keep, 'is_ev_rej', [[], [1, 2, 7, 12]]) #events with reco zvtx > 10 cm after previous selection df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'is_ev_rej', [[3], [1, 2, 7, 12]]) n_no_reco_vtx = len(df_to_keep.index)-len(df_bit_recovtx.index) n_zvtx_gr10 = len(df_bit_zvtx_gr10.index) n_ev_sel = len(df_acc_ev.index) return (n_ev_sel+n_no_reco_vtx) - n_no_reco_vtx*n_zvtx_gr10 / (n_ev_sel+n_zvtx_gr10)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def filter_df_cand(dataframe, main_dict, sel_opt): '''Filter a dataframe looking at the type of candidate. It works both for bitmap and old selection method. In 'database_ml_parameters.yml' only one between old_sel and bitmap_sel must have 'use: True' Implemented selection options: - 'mc_signal' -> select MC signal - 'mc_signal_prompt' -> select only prompt MC signal - 'mc_signal_FD' -> select only feed-down MC signal - 'mc_bkg' -> select MC background - 'presel_track_pid' -> select candidates satisfing PID and track pre-selections - 'sel_std_analysis' -> select candidates fulfilling the std analysis selections Args: dataframe: pandas dataframe to filter main_dict: dictionary of parameters loaded from 'database_ml_parameters.yml' sel_opt: selection option (string) Return: df_selected: filtered pandas dataframe ''' logger = get_logger() bitmap_dict = main_dict['bitmap_sel'] old_dict = main_dict['old_sel'] use_bitmap = bitmap_dict['use'] use_old = old_dict['use'] if use_bitmap == use_old: logger.critical( "One and only one of the selection method have to be used, i.e. with " "'use' flag set to True") if use_bitmap: logger.debug("Using bitmap selection") if sel_opt == 'mc_signal': sel_bits = bitmap_dict['mcsignal_on_off'] elif sel_opt == 'mc_signal_prompt': sel_bits = bitmap_dict['mcsignal_prompt_on_off'] elif sel_opt == 'mc_signal_FD': sel_bits = bitmap_dict['mcsignal_feed_on_off'] elif sel_opt == 'mc_bkg': sel_bits = bitmap_dict['mcbkg_on_off'] elif sel_opt == 'presel_track_pid': sel_bits = bitmap_dict['preseltrack_pid_on_off'] elif sel_opt == 'sel_std_analysis': sel_bits = bitmap_dict['std_analysis_on_off'] else: logger.critical("Wrong selection option!") logger.debug("Candidates before selection: %d", len(dataframe)) df_selected = filter_bit_df(dataframe, bitmap_dict['var_sel'], sel_bits) logger.debug("Candidates after %s selection: %d", sel_opt, len(df_selected)) if use_old: logger.debug("Using old selection") if sel_opt == 'mc_signal': sel_string = old_dict['mc_signal'] elif sel_opt == 'mc_signal_prompt': sel_string = old_dict['mc_signal_prompt'] elif sel_opt == 'mc_signal_FD': sel_string = old_dict['mc_signal_FD'] elif sel_opt == 'mc_bkg': sel_string = old_dict['mc_bkg'] elif sel_opt == 'presel_track_pid': sel_string = old_dict['presel_track_pid'] elif sel_opt == 'sel_std_analysis': sel_string = old_dict['sel_std_analysis'] else: logger.critical("Wrong selection option!") logger.debug("Candidates before selection: %d", len(dataframe)) df_selected = dataframe.query(sel_string) logger.debug("Candidates after %s selection: %d", sel_opt, len(df_selected)) return df_selected
def fill_validation_multiplicity(dfevt, dfevtevtsel, df_reco): """ Create histograms for the validation on the event level as a function of the multiplicity """ _ = len(df_reco) # Binning definition binning_ntrklt = buildbinning(200, -0.5, 199.5) binning_ntrklt_diff = buildbinning(10, -0.5, 9.5) binning_v0m = buildbinning(1500, -0.5, 1499.5) binning_zvtx = buildbinning(100, -15.0, 15) binning_v0m_perc = buildbinning(100, 0, 1) binning_v0m_perc += buildbinning(89, 1.1, 10) binning_v0m_perc += buildbinning(89, 11, 100) # Make and fill histograms val = ValidationCollection(dfevt[dfevt.is_ev_rej_INT7 == 0]) # val = ValidationCollection(dfevt[dfevtevtsel]) # val = ValidationCollection(dfevt[dfevt]) for i in ["v0m", "v0m_eq", "v0m_corr", "v0m_eq_corr"]: val.make_and_fill(binning_ntrklt, "n_tracklets", binning_v0m, i) val.make_and_fill(binning_v0m, i, binning_v0m_perc, "perc_v0m") for i in ["n_tracklets", "n_tracklets_corr", "n_tracklets_corr_shm"]: val.make_and_fill(binning_ntrklt, i, binning_v0m_perc, "perc_v0m") val.make_and_fill(binning_v0m_perc, "perc_v0m", binning_ntrklt, i) val.reset_input(dfevtevtsel, "") val.make_and_fill(binning_ntrklt, "n_tracklets", binning_ntrklt, "n_tracklets_corr") val.make_and_fill(binning_zvtx, "z_vtx_reco", binning_ntrklt, "n_tracklets_corr") val.make_and_fill(binning_zvtx, "z_vtx_reco", binning_ntrklt, "n_tracklets") val.make_and_fill(binning_ntrklt, "n_tracklets_corr") val.make_and_fill(binning_ntrklt, "n_tracklets_corr_shm") val.reset_input(filter_bit_df(dfevt, "is_ev_rej", [[4], []]), "pileup") val.make_and_fill(binning_ntrklt, "n_tracklets_corr") # val.reset_input(dfevtevtsel.query("is_ev_sel_shm == 1"), "spd") # val.make_and_fill(binning_ntrklt, "n_tracklets_corr") df_reco["n_tracklets_corr-n_tracklets_corr_sub"] = ( df_reco["n_tracklets_corr"] - df_reco["n_tracklets_corr_sub"] ) for i in [[df_reco, ""], [df_reco[df_reco.is_ev_rej_INT7 == 0], "MB"], [df_reco.query("is_ev_sel_shm == 1"), "HMSPD"], ]: val.reset_input(*i) val.make_and_fill( binning_ntrklt, "n_tracklets_corr", binning_ntrklt_diff, "n_tracklets_corr-n_tracklets_corr_sub", ) val.make_and_fill( binning_ntrklt, "n_tracklets_corr_sub", binning_ntrklt, "n_tracklets_corr" ) val.make_and_fill( binning_ntrklt, "n_tracklets_corr", binning_ntrklt, "n_tracklets_corr_sub" ) return val
def process_valevents(self, file_index): dfevt = pickle.load(openfile(self.l_evtorig[file_index], "rb")) dfevt = dfevt.query("is_ev_rej==0") dfevtmb = pickle.load(openfile(self.l_evtorig[file_index], "rb")) dfevtmb = dfevtmb.query("is_ev_rej==0") myrunlisttrigmb = self.runlistrigger["INT7"] dfevtselmb = selectdfrunlist(dfevtmb, self.run_param[myrunlisttrigmb], "run_number") triggerlist = ["INT7", "HighMultV0", "HighMultSPD"] varlist = ["v0m_corr", "n_tracklets_corr", "perc_v0m"] nbinsvar = [100, 200, 200] minrvar = [0, 0, 0] maxrvar = [1500, 200, .5] fileevtroot = TFile.Open(self.l_evtvalroot[file_index], "recreate") hv0mvsperc = scatterplot(dfevt, "perc_v0m", "v0m_corr", 50000, 0, 100, 200, 0., 2000.) hv0mvsperc.SetName("hv0mvsperc") hv0mvsperc.Write() dfevtnorm = pickle.load(openfile(self.l_evtorig[file_index], "rb")) hntrklsperc = scatterplot(dfevt, "perc_v0m", "n_tracklets_corr", 50000, 0, 100, 200, 0., 2000.) hntrklsperc.SetName("hntrklsperc") hntrklsperc.Write() for ivar, var in enumerate(varlist): label = "hbitINT7vs%s" % (var) histoMB = TH1F(label, label, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) fill_hist(histoMB, dfevtselmb.query("trigger_hasbit_INT7==1")[var]) histoMB.Sumw2() histoMB.Write() for trigger in triggerlist: triggerbit = "trigger_hasbit_%s==1" % trigger labeltriggerANDMB = "hbit%sANDINT7vs%s" % (trigger, var) labeltrigger = "hbit%svs%s" % (trigger, var) histotrigANDMB = TH1F(labeltriggerANDMB, labeltriggerANDMB, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) histotrig = TH1F(labeltrigger, labeltrigger, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) myrunlisttrig = self.runlistrigger[trigger] ev = len(dfevt) dfevtsel = selectdfrunlist(dfevt, self.run_param[myrunlisttrig], "run_number") if len(dfevtsel) < ev: print("Reduced number of events in trigger", trigger) print(ev, len(dfevtsel)) fill_hist(histotrigANDMB, dfevtsel.query(triggerbit + " and trigger_hasbit_INT7==1")[var]) fill_hist(histotrig, dfevtsel.query(triggerbit)[var]) histotrigANDMB.Sumw2() histotrig.Sumw2() histotrigANDMB.Write() histotrig.Write() hSelMult = TH1F('sel_' + labeltrigger, 'sel_' + labeltrigger, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) hNoVtxMult = TH1F('novtx_' + labeltrigger, 'novtx_' + labeltrigger, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) hVtxOutMult = TH1F('vtxout_' + labeltrigger, 'vtxout_' + labeltrigger, nbinsvar[ivar], minrvar[ivar], maxrvar[ivar]) # multiplicity dependent normalisation dftrg = dfevtnorm.query(triggerbit) dfsel = dftrg.query('is_ev_rej == 0') df_to_keep = filter_bit_df(dftrg, 'is_ev_rej', [[], [0, 5, 6, 10, 11]]) # events with reco vtx after previous selection tag_vtx = tag_bit_df(df_to_keep, 'is_ev_rej', [[], [1, 2, 7, 12]]) df_no_vtx = df_to_keep[~tag_vtx.values] # events with reco zvtx > 10 cm after previous selection df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'is_ev_rej', [[3], [1, 2, 7, 12]]) fill_hist(hSelMult, dfsel[var]) fill_hist(hNoVtxMult, df_no_vtx[var]) fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var]) hSelMult.Write() hNoVtxMult.Write() hVtxOutMult.Write() hNorm = TH1F("hEvForNorm", ";;Normalisation", 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 if not dfevtnorm.empty: nselevt = len(dfevtnorm.query("is_ev_rej==0")) norm = getnormforselevt(dfevtnorm) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() fileevtroot.Close()
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] arraysub = [0 for ival in range(len(dfreco))] n_tracklets_corr = dfreco["n_tracklets_corr"].values n_tracklets_corr_sub = None for iprong in range(self.nprongs): spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \ for index in range(len(dfreco))] arraysub = np.add(ntrackletsthisprong, arraysub) n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub) dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] try: dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) except Exception as e: # pylint: disable=broad-except print('Missing variable in the event root tree', str(e)) print('Missing variable in the candidate root tree') print('I am sorry, I am dying ...\n \n \n') sys.exit() dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] try: dfreco = treereco.pandas.df(branches=self.v_all) except Exception as e: # pylint: disable=broad-except print('Missing variable in the candidate root tree') print('I am sorry, I am dying ...\n \n \n') sys.exit() dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] arraysub = [0 for ival in range(len(dfreco))] n_tracklets_corr = dfreco["n_tracklets_corr"].values n_tracklets_corr_shm = dfreco["n_tracklets_corr_shm"].values n_tracklets_corr_sub = None n_tracklets_corr_shm_sub = None for iprong in range(self.nprongs): if self.prongformultsub[iprong] == 0: continue #print("considering prong %d for sub" % iprong) spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \ for index in range(len(dfreco))] arraysub = np.add(ntrackletsthisprong, arraysub) n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub) n_tracklets_corr_shm_sub = np.subtract(n_tracklets_corr_shm, arraysub) dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub dfreco["n_tracklets_corr_shm_sub"] = n_tracklets_corr_shm_sub if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) fileevtroot = TFile.Open(self.l_evtorigroot[file_index], "recreate") hNorm = TH1F("hEvForNorm", ";;Normalisation", 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 # Handle silent weird behaviour of Pandas if dataframe is empty # Otherwise, if it is empty it might just silently return from this frunction for some # reason and everything what follows would just be skipped. if not dfevt.empty: nselevt = len(dfevt.query("is_ev_rej==0")) norm = getnormforselevt(dfevt) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() fileevtroot.Close() treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): # Open root file and save event tree to dataframe treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) # Only save events within the given run period & required centrality dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) # Reset dataframe index and save to "original" pickle file dfevtorig = dfevtorig.reset_index(drop=True) dfevtorig.to_pickle(self.l_evtorig[file_index]) # Select "good" events and save to a second pickle file dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) dfevt.to_pickle(self.l_evt[file_index]) # Open root file again, get the reconstructed tree into a dataframe treereco = uproot.open(self.l_root[file_index])[self.n_treereco] if not treereco: print('Couldn\'t find tree %s in file %s' % \ (self.n_treereco, self.l_root[file_index])) dfreco = treereco.pandas.df(branches=self.v_all) # Only save events within the given run period & required cuts dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) if 'Jet' not in self.case: isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) # Save reconstructed data to another pickle file dfreco.to_pickle(self.l_reco[file_index]) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) dfgen.to_pickle(self.l_gen[file_index])