def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def skimmer(filein, filevt, fileout, skimming_sel, var_evt_match, param_case, presel_reco, sel_cent, skimming2_dotrackpid, runlist): df = pickle.load(open(filein, "rb")) dfevt = pickle.load(open(filevt, "rb")) if "Evt" not in filein: df = pd.merge(df, dfevt, on=var_evt_match) if skimming_sel is not None: df = df.query(skimming_sel) if runlist is not None: array_run = df.run_number.values isgoodrun = select_runs(runlist, array_run) df = df[np.array(isgoodrun, dtype=bool)] if "Reco" in filein: if skimming2_dotrackpid is True: df = filter_df_cand(df, param_case, 'presel_track_pid') if presel_reco is not None: df = df.query(presel_reco) array_pt = df.pt_cand.values array_y = df.y_cand.values isselacc = selectfidacc(array_pt, array_y) df = df[np.array(isselacc, dtype=bool)] if sel_cent is not None: df = df.query(sel_cent) df.to_pickle(fileout)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] arraysub = [0 for ival in range(len(dfreco))] n_tracklets_corr = dfreco["n_tracklets_corr"].values n_tracklets_corr_sub = None for iprong in range(self.nprongs): spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \ for index in range(len(dfreco))] arraysub = np.add(ntrackletsthisprong, arraysub) n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub) dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] try: dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) except Exception as e: # pylint: disable=broad-except print('Missing variable in the event root tree', str(e)) print('Missing variable in the candidate root tree') print('I am sorry, I am dying ...\n \n \n') sys.exit() dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] try: dfreco = treereco.pandas.df(branches=self.v_all) except Exception as e: # pylint: disable=broad-except print('Missing variable in the candidate root tree') print('I am sorry, I am dying ...\n \n \n') sys.exit() dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] arraysub = [0 for ival in range(len(dfreco))] n_tracklets_corr = dfreco["n_tracklets_corr"].values n_tracklets_corr_shm = dfreco["n_tracklets_corr_shm"].values n_tracklets_corr_sub = None n_tracklets_corr_shm_sub = None for iprong in range(self.nprongs): if self.prongformultsub[iprong] == 0: continue #print("considering prong %d for sub" % iprong) spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \ for index in range(len(dfreco))] arraysub = np.add(ntrackletsthisprong, arraysub) n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub) n_tracklets_corr_shm_sub = np.subtract(n_tracklets_corr_shm, arraysub) dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub dfreco["n_tracklets_corr_shm_sub"] = n_tracklets_corr_shm_sub if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) fileevtroot = TFile.Open(self.l_evtorigroot[file_index], "recreate") hNorm = TH1F("hEvForNorm", ";;Normalisation", 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 # Handle silent weird behaviour of Pandas if dataframe is empty # Otherwise, if it is empty it might just silently return from this frunction for some # reason and everything what follows would just be skipped. if not dfevt.empty: nselevt = len(dfevt.query("is_ev_rej==0")) norm = getnormforselevt(dfevt) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() fileevtroot.Close() treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): # Open root file and save event tree to dataframe treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) # Only save events within the given run period & required centrality dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) # Reset dataframe index and save to "original" pickle file dfevtorig = dfevtorig.reset_index(drop=True) dfevtorig.to_pickle(self.l_evtorig[file_index]) # Select "good" events and save to a second pickle file dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) dfevt.to_pickle(self.l_evt[file_index]) # Open root file again, get the reconstructed tree into a dataframe treereco = uproot.open(self.l_root[file_index])[self.n_treereco] if not treereco: print('Couldn\'t find tree %s in file %s' % \ (self.n_treereco, self.l_root[file_index])) dfreco = treereco.pandas.df(branches=self.v_all) # Only save events within the given run period & required cuts dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) if 'Jet' not in self.case: isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) # Save reconstructed data to another pickle file dfreco.to_pickle(self.l_reco[file_index]) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) dfgen.to_pickle(self.l_gen[file_index])