def __rdf_from_dataset(self, dataset): t_names = [ntuple.directory for ntuple in \ dataset.ntuples] if len(set(t_names)) == 1: tree_name = t_names.pop() else: raise NameError( 'Impossible to create RDataFrame with different tree names') chain = TChain() ftag_fchain = {} for ntuple in dataset.ntuples: chain.Add('{}/{}'.format(ntuple.path, ntuple.directory)) for friend in ntuple.friends: if friend.tag not in ftag_fchain.keys(): ftag_fchain[friend.tag] = TChain() ftag_fchain[friend.tag].Add('{}/{}'.format( friend.path, friend.directory)) for ch in ftag_fchain.values(): chain.AddFriend(ch) # Keep friend chains alive self.friend_tchains.append(ch) if self.nthreads != 1: EnableImplicitMT(self.nthreads) # Keep main chain alive self.tchains.append(chain) rdf = RDataFrame(chain) rcw = RDataFrameCutWeight(rdf) return rcw
def get_frame(tree_name, file_name): frame = RDataFrame(tree_name, file_name) if verbose: colNames = frame.GetColumnNames() for j in colNames: print(j) return frame
def makeDataFrame(self): sample_dict = {} samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel) working_samples = samples_doublefake working_samples = setSumWeights(working_samples) print('###########################################################') print('# measuring doublefakerake...') print('# %d samples to be used:'%(len(working_samples))) print('###########################################################') for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)])) chain = TChain('tree') #TChain'ing all data samples together for i,s in enumerate(working_samples): sample = working_samples[0] file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root']) chain.Add(file_name) dataframe = RDataFrame(chain) weight = 'weight * lhe_weight' dataframe = dataframe.Define('w',weight)\ .Define('ptCone',self.ptCone())\ .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\ .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\ .Define('abs_l1_eta','abs(l1_eta)')\ .Define('abs_l2_eta','abs(l2_eta)')\ .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\ .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\ return dataframe
def getRDF(pattern, treeName=None, keepObj=None): if treeName is None: treeName = 'tuple0/DecayTree' patterns = [pattern] if isinstance(pattern, str) else pattern from ROOT import TChain, RDataFrame import glob ch1 = TChain(treeName) for pn in patterns: for f in glob.glob(pn): ch1.Add(f) if keepObj is not None: keepObj.append(ch1) # to keep this object in the memory return RDataFrame(ch1) ## if no container is provided, return both objects return RDataFrame(ch1), ch1
def __rdf_from_dataset(self, dataset): chain, self.friend_tchains = rdf_from_dataset_helper(dataset) if self.nthreads != 1: EnableImplicitMT(self.nthreads) # Keep main chain alive self.tchains.append(chain) rdf = RDataFrame(chain) rcw = RDataFrameCutWeight(rdf) return rcw
def get_frame(tree_name, file_name): """ Getter of the frame from the file """ frame = RDataFrame(tree_name, file_name) if verbose: colNames = frame.GetColumnNames() for j in colNames: print(j) return frame
def merge_friend(output_ntp_name, friends, tree_branch_dict, config): # Here we don't drop any branch. We do only keep specified trees. opts = RDF.RSnapshotOptions() opts.fMode = 'UPDATE' for full_path, tree in friends.items(): if config[full_path]['keep']: rd1 = RDataFrame(tree) cut = concat_selections(config[full_path]['selection']) if cut: rd2 = rd1.Filter(cut) else: rd2 = rd1 output_br = make_output_vec(tree_branch_dict[full_path]) rd2.Snapshot(full_path, output_ntp_name, output_br, opts)
def get_frame(file_name, df_index=0, tree_name="O2mcparticle_001"): """ Getter of the frame from the file """ if not path.isfile(file_name): raise ValueError("Did not find AOD file", file_name) sub_names = run_cmd(f"rootls {file_name}").strip().split() df_name = [] for i in sub_names: if not i.startswith("DF_") and not i.startswith("TF_"): continue df_name.append(i) df_name = df_name[df_index] print(df_name) frame = RDataFrame(f"{df_name}/{tree_name}", file_name) if verbose: colNames = frame.GetColumnNames() for j in enumerate(colNames): print(j, frame.GetColumnType(j[1])) return frame
def makeRootDataFrameFromTree(self, tree_file_name, tree_name='tree', verbose=False, friend_name='ML', friend_file_name=None): '''Cache files/trees''' ttree = self.readTree(tree_file_name, tree_name, verbose) if verbose: print ('read dataframe', dataframe, 'from file', tree_file_name) if friend_file_name: ttree.AddFriend(friend_name + '=tree',friend_file_name) #VALIDATE# validate1 = ttree.GetEntries('l2_pt - %s.l2_pt'%friend_name) validate2 = ttree.GetEntries('event - %s.event'%friend_name) if not validate1+validate2 == 0: print ('\n\tERROR: FRIEND TREE NOT ALIGNED, FAKERATE USELESS', m, n) gROOT.cd() # dataframe = RDataFrame(tree_name,tree_file_name) dataframe = RDataFrame(ttree) return dataframe
def measureSFR(self, drawPlot = False): sample_dict = {} samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel) working_samples = samples_singlefake working_samples = setSumWeights(working_samples) print('###########################################################') print('# measuring singlefakerake...') print('# %d samples to be used:'%(len(working_samples))) print('###########################################################') for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)])) chain = TChain('tree') #TChain'ing all data samples together for i,s in enumerate(working_samples): sample = working_samples[0] file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root']) chain.Add(file_name) dataframe = RDataFrame(chain) weight = 'weight * lhe_weight' dataframe = dataframe.Define('w',weight)\ .Define('ptCone',self.ptCone())\ .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\ .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\ .Define('abs_l1_eta','abs(l1_eta)')\ .Define('abs_l2_eta','abs(l2_eta)')\ .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\ .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\ # bins_ptCone = np.array([5.,10., 20., 30., 40.,70., 2000]) # bins_eta = np.array([0., 0.8, 1.2, 2.4]) bins_ptCone = np.array([5.,10., 20., 30., 40.,70.]) bins_eta = np.array([0., 0.8, 1.2, 2.4]) selection_baseline = getSelection(self.channel,'MR_SF') selection_LL_uncorrelated = '(' + ' & '\ .join([\ selection_baseline,\ getSelection(self.channel,'L_L_uncorrelated')\ ]) + ')' selection_TT_uncorrelated = '(' + ' & '\ .join([\ selection_baseline,\ getSelection(self.channel,'L_L_uncorrelated'),\ getSelection(self.channel,'T_T')\ ]) + ')' h_LL_uncorrelated = dataframe\ .Filter(selection_LL_uncorrelated)\ .Histo2D(('h_LL_uncorrelated','h_LL_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w') #name the axis, also initiate the dataframe call h_LL_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta') h_TT_uncorrelated = dataframe\ .Filter(selection_TT_uncorrelated)\ .Histo2D(('h_TT_uncorrelated','h_TT_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w') #name the axis, also initiate the dataframe call h_TT_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta') # preparing the histo and save it into a .root file sfr_TH2_dir = '/home/dehuazhu/HNL/CMSSW_9_4_6_patch1/src/PlotFactory/DataBkgPlots/modules/DDE_singlefake.root' sfr_hist = h_TT_uncorrelated.Clone() # sfr_hist = h_LL_uncorrelated.Clone() # sfrhist = h_baseline.Clone() # sfr_hist.Divide(h_LL_uncorrelated.Clone()) # dfr_hist.SaveAs(sfr_TH2_dir) #uncomment this to save the TH2 # draw the histo if required if drawPlot == True: can = TCanvas('can', '') # sfr_hist.Draw('colzTextE') # sfr_hist.Draw('colz') sfr_hist.Draw() pf.showlumi('%d entries'%(sfr_hist.GetEntries())) # pf.showlogopreliminary() can.Update() set_trace()
gInterpreter.Declare(''' Int_t getBin(Double_t x, Double_t y, TH2D* histo) { return histo->FindFixBin(x, y); } auto getWeight(Double_t x, Double_t y, TH2D* histo) { auto binIdx = getBin(x, y, histo); return histo->GetBinContent(binIdx); } ''') gInterpreter.Declare(f'auto histoNtp = new TFile("{histoNtpN}", "read");') gInterpreter.Declare(f'auto histo = dynamic_cast<TH2D*>(histoNtp->Get("{histoN}"));') dfInit = RDataFrame(mcTreeN, mcNtpN) df = dfInit.Define('wjk_alt', 'getWeight(b_ownpv_ndof, ntracks, histo)').Define('wt', 'wpid*wtrk*wjk_alt') # NOTE: This comes from the existing ntuple mcRootBrs = df.AsNumpy(columns=['wjk_occ', 'wjk_alt']) wtJkOccRoot = mcRootBrs['wjk_occ'] wtJkOccAltRoot = mcRootBrs['wjk_alt'] histoRootMdl = TH2DModel( 'histoRoot', 'histoRoot', 20, 1, 200, 20, 0, 450 ) histoRoot = df.Histo2D(histoRootMdl, 'b_ownpv_ndof', 'ntracks', 'wt') ##################
from ROOT import (ROOT, RDataFrame, TCanvas) ROOT.EnableImplicitMT() #get file and tree from directory minbias_ntuple = "/user/egovorko/work/public/minbias_JpsiPhi.root" signal_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" minbias_df = RDataFrame(input_tree_name, minbias_ntuple) signal_df = RDataFrame(input_tree_name, signal_ntuple) PIDmu_cut = "eventmuons_PIDmu[eventmuons_PIDmu > 0].size() >= 1" #pt_cut = "eventmuons_PT[eventmuons_PT > 500].size() >= 1" minbias_pidmu = minbias_df.Filter(PIDmu_cut) #minbias_pt = minbias_pidmu.Filter(pt_cut) signal_pidmu = signal_df.Filter(PIDmu_cut) #signal_pt = signal_pidmu.Filter(pt_cut) c1 = TCanvas() c1.Divide(2, 2) c1.cd(1) pidmu1 = minbias_df.Histo1D("eventmuons_PIDmu") pidmu1.SetTitle("Event muon PIDmu distribution (min bias)") pidmu1.GetXaxis().SetTitle("PIDmu") pidmu1.Draw() c1.cd(2) pidmu2 = signal_df.Histo1D("eventmuons_PIDmu") pidmu2.SetTitle("Event muon PIDmu distribution (signal)") pidmu2.GetXaxis().SetTitle("PIDmu") pidmu2.SetLineColor(2)
parser.add_argument('--do', nargs='+', help="list of collections to make plots of") args = parser.parse_args() models = { 'm': (';m_{{{0}}} [GeV]; Events', 50, 0, 1000), 'pt': (';p_{{T,{0}}} [GeV]; Events', 50, 0, 1000), 'eta': (';#eta_{{{0}}}; Events', 15, -3., 3.), 'phi': (';#phi_{{{0}}}; Events', 20, -4., 4.) } outdir = 'plots' if args.output: outdir = args.output rdf = RDataFrame("CollectionTree", args.infile) rdf = rdf.Define('wgt', 'EventInfoAuxDyn.mcEventWeights[0]') canv = TCanvas('c', '', 800, 600) for coll in args.do: coll = coll.split(':') if len(coll) != 1 and len(coll) != 4: print('collection should either be "name" or "name:nbins:xmin:xmax"') continue varsuff = '' if 'fatjet' in coll[0]: cname = 'AntiKt10TruthTrimmedPtFrac5SmallR20JetsAux' elif 'jet' in coll[0]: cname = 'AntiKt4TruthDressedWZJetsAux'
description='find retention rates for various trigger paths.') parser.add_argument('ntp', help='specify ntuple path.') parser.add_argument('tree', help='specify tree name.') parser.add_argument('-t', '--trigger-paths', nargs='+', help='specify trigger paths.') return parser.parse_args() ######## # Main # ######## if __name__ == '__main__': args = parse_input() frame = RDataFrame(args.tree, args.ntp) cuts = [] for tp in args.trigger_paths: c = frame.Filter( tp, tp) # This is to avoid garbage collector to delete our pointer cuts.append(c) report = frame.Report() report.Print()
from ROOT import (ROOT, RDataFrame, TCanvas, TH1D) import numpy as np ROOT.EnableImplicitMT() #get file and tree from directory input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" dataframe = RDataFrame(input_tree_name, input_ntuple) #dataframe_with_truep = dataframe.Define('Bs_momentum', 'pow( Bs_TRUEP_X*Bs_TRUEP_X + Bs_TRUEP_Y*Bs_TRUEP_Y + Bs_TRUEP_Z*Bs_TRUEP_Z , 0.5)') df_bkg = dataframe.Filter("Bs_BKGCAT == 0 || Bs_BKGCAT == 50") #df_cuts1 = dataframe_with_truep.Filter("(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "trigger_cuts") cut1 = df_bkg.Filter("(Bs_TAU > 0.0015)", "tau_cut") cut2 = cut1.Filter("(Bs_M > 5150) && (Bs_M < 5550)", "b_mass_cut") cut3 = cut2.Filter("(Jpsi_M > 3020) && (Jpsi_M < 3170)", "jpsi_mass_cut") cut4 = cut3.Filter("(Phi_M > 980) && (Phi_M < 1050)", "phi_mass_cut") cut5 = cut4.Filter("(muplus_PT > 500) && (mumin_PT > 500)", "mu_pt_cut") cut6 = cut5.Filter("(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20", "b_vtx_cut") cut7 = cut6.Filter("(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)", "jpsi_vtx_cut") cut8 = cut7.Filter("(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "phi_vtx_cut") cut9 = cut8.Filter("mumin_PIDmu > 0 && muplus_PIDmu > 0", "mu_id_cut") cut10 = cut9.Filter("Kmin_PIDK > 0 && Kplus_PIDK > 0", "k_id_cut") #print("Mumin eff: ") #mumincut = cut9.Report() #mumincut.Print() print("All stats: ") cutsreport = dataframe.Report()
frames.append(new_frm) apply_skim_cuts(new_frm, skim_cuts, ref) ######## # Main # ######## if __name__ == '__main__': ntp_dst = '../../ntuples/ref-rdx-run1/Dst-mix/Dst--21_10_21--mix--all--2011-2012--md-mu--phoebe.root' ntp_d0 = '../../ntuples/ref-rdx-run1/D0-mix/D0--21_10_21--mix--all--2011-2012--md-mu--phoebe.root' if len(sys.argv) == 1 or sys.argv[1].lower() == 'dst': print('Working on Dst...') frame_dst = RDataFrame('ntp1', ntp_dst) apply_cuts(frame_dst, DST_CUTS, DST_SKIM_CUTS, DST_REF_NUMS) elif sys.argv[1].lower() == 'd0': print('Working on D0...') frame_d0 = RDataFrame('ntp1', ntp_d0) apply_cuts(frame_d0, D0_CUTS, D0_SKIM_CUTS, D0_REF_NUMS) elif sys.argv[1].lower() == 'dstwsmu': print('Working on Dst wrong-sign Mu...') frame_dst = RDataFrame('ntp1', ntp_dst) apply_cuts(frame_dst, DST_WS_MU_CUTS, DST_SKIM_CUTS, DST_WS_MU_REF_NUMS) elif sys.argv[1].lower() == 'dstwspi': print('Working on Dst wrong-sign slow Pi...') frame_dst = RDataFrame('ntp1', ntp_dst) apply_cuts(frame_dst, DST_WS_PI_CUTS, DST_SKIM_CUTS, DST_WS_PI_REF_NUMS)
def get_dataframe(dataset): tchain, friend_tchains = rdf_from_dataset_helper(dataset) rdf = RDataFrame(tchain) setattr(rdf, 'tchain', tchain) setattr(rdf, 'friend_tchains', friend_tchains) return rdf
######## # Main # ######## if __name__ == '__main__': args = parse_input() histos = glob_histos(args.histo_folder) config = parse_config(args.config) loaded_histos = dict() output_opts = RSnapshotOptions() output_opts.fMode = 'UPDATE' first_write = True for idx, tree in enumerate(config['trees']): print('Processing tree {}...'.format(tree)) init_frame = RDataFrame(tree, args.input_ntp) frames = [init_frame] output_brs = vector('string')(['runNumber', 'eventNumber']) for br, directive in config['config'].items(): if tree in directive['skip_tree']: continue print(' Processing {}...'.format(br)) params = ', '.join(resolve_params(directive['vars'], idx)) histo_name = directive['histo_name'] histo_dim = len(directive['vars']) debug_br = 'debug_{}_bin_idx'.format(br) wt_histo = load_histo(