def makeDataFrame(self): sample_dict = {} samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel) working_samples = samples_doublefake working_samples = setSumWeights(working_samples) print('###########################################################') print('# measuring doublefakerake...') print('# %d samples to be used:'%(len(working_samples))) print('###########################################################') for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)])) chain = TChain('tree') #TChain'ing all data samples together for i,s in enumerate(working_samples): sample = working_samples[0] file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root']) chain.Add(file_name) dataframe = RDataFrame(chain) weight = 'weight * lhe_weight' dataframe = dataframe.Define('w',weight)\ .Define('ptCone',self.ptCone())\ .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\ .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\ .Define('abs_l1_eta','abs(l1_eta)')\ .Define('abs_l2_eta','abs(l2_eta)')\ .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\ .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\ return dataframe
def get_frame(tree_name, file_name): frame = RDataFrame(tree_name, file_name) if verbose: colNames = frame.GetColumnNames() for j in colNames: print(j) return frame
def get_frame(tree_name, file_name): """ Getter of the frame from the file """ frame = RDataFrame(tree_name, file_name) if verbose: colNames = frame.GetColumnNames() for j in colNames: print(j) return frame
def split(file_name): #without.root tfile = rt.TFile(file_name+'.root') tree = tfile.Get('tree') df = RDF(tree) n = tree.GetEntries() df1 = df.Range(0,int(n/2)) df2 = df.Range(int(n/2),0) df1.Snapshot('tree', '%s_training_half.root'%file_name) df2.Snapshot('tree', '%s_untouched_half.root'%file_name)
def __rdf_from_dataset(self, dataset): t_names = [ntuple.directory for ntuple in \ dataset.ntuples] if len(set(t_names)) == 1: tree_name = t_names.pop() else: raise NameError( 'Impossible to create RDataFrame with different tree names') chain = TChain() ftag_fchain = {} for ntuple in dataset.ntuples: chain.Add('{}/{}'.format(ntuple.path, ntuple.directory)) for friend in ntuple.friends: if friend.tag not in ftag_fchain.keys(): ftag_fchain[friend.tag] = TChain() ftag_fchain[friend.tag].Add('{}/{}'.format( friend.path, friend.directory)) for ch in ftag_fchain.values(): chain.AddFriend(ch) # Keep friend chains alive self.friend_tchains.append(ch) if self.nthreads != 1: EnableImplicitMT(self.nthreads) # Keep main chain alive self.tchains.append(chain) rdf = RDataFrame(chain) rcw = RDataFrameCutWeight(rdf) return rcw
def merge_friend(output_ntp_name, friends, tree_branch_dict, config): # Here we don't drop any branch. We do only keep specified trees. opts = RDF.RSnapshotOptions() opts.fMode = 'UPDATE' for full_path, tree in friends.items(): if config[full_path]['keep']: rd1 = RDataFrame(tree) cut = concat_selections(config[full_path]['selection']) if cut: rd2 = rd1.Filter(cut) else: rd2 = rd1 output_br = make_output_vec(tree_branch_dict[full_path]) rd2.Snapshot(full_path, output_ntp_name, output_br, opts)
def getRDF(pattern, treeName=None, keepObj=None): if treeName is None: treeName = 'tuple0/DecayTree' patterns = [pattern] if isinstance(pattern, str) else pattern from ROOT import TChain, RDataFrame import glob ch1 = TChain(treeName) for pn in patterns: for f in glob.glob(pn): ch1.Add(f) if keepObj is not None: keepObj.append(ch1) # to keep this object in the memory return RDataFrame(ch1) ## if no container is provided, return both objects return RDataFrame(ch1), ch1
def __rdf_from_dataset(self, dataset): chain, self.friend_tchains = rdf_from_dataset_helper(dataset) if self.nthreads != 1: EnableImplicitMT(self.nthreads) # Keep main chain alive self.tchains.append(chain) rdf = RDataFrame(chain) rcw = RDataFrameCutWeight(rdf) return rcw
def get_frame(file_name, df_index=0, tree_name="O2mcparticle_001"): """ Getter of the frame from the file """ if not path.isfile(file_name): raise ValueError("Did not find AOD file", file_name) sub_names = run_cmd(f"rootls {file_name}").strip().split() df_name = [] for i in sub_names: if not i.startswith("DF_") and not i.startswith("TF_"): continue df_name.append(i) df_name = df_name[df_index] print(df_name) frame = RDataFrame(f"{df_name}/{tree_name}", file_name) if verbose: colNames = frame.GetColumnNames() for j in enumerate(colNames): print(j, frame.GetColumnType(j[1])) return frame
def produceLightTree(sample='DY',ch='mmm'): if ch == 'mmm': d17B = data_B_mmm+suffix; d17C = data_C_mmm+suffix; d17D = data_D_mmm+suffix; d17E = data_E_mmm+suffix; d17F = data_F_mmm+suffix; SFR_012_L = SFR_MMM_012_L l2_tight = l2_m_tight if ch == 'eem': d17B = data_B_eem+suffix; d17C = data_C_eem+suffix; d17D = data_D_eem+suffix; d17E = data_E_eem+suffix; d17F = data_F_eem+suffix; t = rt.TChain('tree') if sample == 'DY': t.Add(DY) t.Add(DY_ext) if sample == 'data': t.Add(d17B) #t.Add(d17C) #t.Add(d17D) #t.Add(d17E) #t.Add(d17F) print '\n\ttotal entries:', t.GetEntries() df = RDF(t) df1 = df.Define('LOOSE', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 )' ) df2 = df1.Define('TIGHT', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 && ' + l2_tight + ')' ) num_L = df2.Filter('LOOSE == 1').Count().GetValue() print '\n\tloose entries in MR:', num_L num_T = df2.Filter('TIGHT == 1').Count().GetValue() print '\n\ttight entries in MR:', num_T df2 = df2.Define('ptcone', PTCONEL2) branchList = rt.vector('string')() for br in ['event', 'lumi', 'run', 'LOOSE', 'TIGHT', 'l2_reliso_rho_03', 'l2_Medium', 'l2_eta', 'l2_pt', 'l2_dxy', 'l2_dz', 'ptcone']: branchList.push_back(br) df2.Snapshot('tree', saveDir+'/%s_%s_6_24B_Lcut_29_4.root'%(sample,ch), branchList)
def __init__(self, name, label, selection, datacard_name, colour, position_in_stack, basedir, postfix, isdata, ismc, issignal, weight, xs): self.name = name print 'loading', self.name self.label = label self.selection = selection self.datacard_name = datacard_name self.colour = colour self.position_in_stack = position_in_stack self.basedir = basedir self.postfix = postfix self.isdata = isdata self.ismc = ismc self.issignal = issignal self.weight = weight self.xs = xs self.nevents = 1. self.file = '/'.join([basedir, self.name, postfix]) if not self.isdata: nevents_file = '/'.join( [basedir, self.name, 'SkimAnalyzerCount/SkimReport.txt']) with open(nevents_file) as ff: lines = ff.readlines() for line in lines: if 'Sum Norm Weights' in line: self.nevents = float(re.findall(r'\d+', lines[2])[0]) break tree_file = '/'.join([self.basedir, self.name, self.postfix]) rdf = RDF('tree', tree_file) rdf = rdf.Filter(self.selection) # set_trace() df = rdf.AsNumpy() self.df = pd.DataFrame(df) # scale to 1/pb self.lumi_scaling = 1. if self.isdata else (self.xs / self.nevents)
def makeRootDataFrameFromTree(self, tree_file_name, tree_name='tree', verbose=False, friend_name='ML', friend_file_name=None): '''Cache files/trees''' ttree = self.readTree(tree_file_name, tree_name, verbose) if verbose: print ('read dataframe', dataframe, 'from file', tree_file_name) if friend_file_name: ttree.AddFriend(friend_name + '=tree',friend_file_name) #VALIDATE# validate1 = ttree.GetEntries('l2_pt - %s.l2_pt'%friend_name) validate2 = ttree.GetEntries('event - %s.event'%friend_name) if not validate1+validate2 == 0: print ('\n\tERROR: FRIEND TREE NOT ALIGNED, FAKERATE USELESS', m, n) gROOT.cd() # dataframe = RDataFrame(tree_name,tree_file_name) dataframe = RDataFrame(ttree) return dataframe
parser.add_argument('--do', nargs='+', help="list of collections to make plots of") args = parser.parse_args() models = { 'm': (';m_{{{0}}} [GeV]; Events', 50, 0, 1000), 'pt': (';p_{{T,{0}}} [GeV]; Events', 50, 0, 1000), 'eta': (';#eta_{{{0}}}; Events', 15, -3., 3.), 'phi': (';#phi_{{{0}}}; Events', 20, -4., 4.) } outdir = 'plots' if args.output: outdir = args.output rdf = RDataFrame("CollectionTree", args.infile) rdf = rdf.Define('wgt', 'EventInfoAuxDyn.mcEventWeights[0]') canv = TCanvas('c', '', 800, 600) for coll in args.do: coll = coll.split(':') if len(coll) != 1 and len(coll) != 4: print('collection should either be "name" or "name:nbins:xmin:xmax"') continue varsuff = '' if 'fatjet' in coll[0]: cname = 'AntiKt10TruthTrimmedPtFrac5SmallR20JetsAux' elif 'jet' in coll[0]: cname = 'AntiKt4TruthDressedWZJetsAux'
gInterpreter.Declare(''' Int_t getBin(Double_t x, Double_t y, TH2D* histo) { return histo->FindFixBin(x, y); } auto getWeight(Double_t x, Double_t y, TH2D* histo) { auto binIdx = getBin(x, y, histo); return histo->GetBinContent(binIdx); } ''') gInterpreter.Declare(f'auto histoNtp = new TFile("{histoNtpN}", "read");') gInterpreter.Declare(f'auto histo = dynamic_cast<TH2D*>(histoNtp->Get("{histoN}"));') dfInit = RDataFrame(mcTreeN, mcNtpN) df = dfInit.Define('wjk_alt', 'getWeight(b_ownpv_ndof, ntracks, histo)').Define('wt', 'wpid*wtrk*wjk_alt') # NOTE: This comes from the existing ntuple mcRootBrs = df.AsNumpy(columns=['wjk_occ', 'wjk_alt']) wtJkOccRoot = mcRootBrs['wjk_occ'] wtJkOccAltRoot = mcRootBrs['wjk_alt'] histoRootMdl = TH2DModel( 'histoRoot', 'histoRoot', 20, 1, 200, 20, 0, 450 ) histoRoot = df.Histo2D(histoRootMdl, 'b_ownpv_ndof', 'ntracks', 'wt') ##################
"Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_vX", "Triggered_HLT_IsoMu27_vX", ] branch_vec = ROOT.vector("string")() [branch_vec.push_back(branch) for branch in branches] if not options.is_dataframe: print( "No dataframe was given. Handling the arguments as trees and adding them to chain." ) input_files = args input_chain = ROOT.TChain("MVATree") for input_file in input_files: input_chain.Add(input_file) print("Finished loading chain with ", input_chain.GetEntries(), " entries") data_frame = RDF(input_chain, branch_vec) else: print("Dataframe flag was set. Handling argument as dataframe.") input_file = args[0] data_frame = RDF("tree", input_file) print("Finished converting the chain to RDataFrame") if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + options.name + "_dataframe.root") data_frame.Snapshot( "tree", data_mc_string + "_" + options.name + "_dataframe.root", branch_vec) print("saved dataframe to disk ...")
"DeltaR_AK4Jet_LooseElectron", "DeltaR_AK4Jet_LooseMuon" ] branch_vec = ROOT.vector("string")() [branch_vec.push_back(branch) for branch in branches] if not options.is_dataframe: print( "No dataframe was given. Handling the arguments as trees and adding them to chain." ) input_files = args input_chain = ROOT.TChain("MVATree") for input_file in input_files: input_chain.Add(input_file) print("Finished loading chain with ", input_chain.GetEntries(), " entries") data_frame = RDF(input_chain, branch_vec) else: print("Dataframe flag was set. Handling argument as dataframe.") input_file = args[0] data_frame = RDF("tree", input_file) print("Finished converting the chain to RDataFrame") if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + options.name + "_dataframe.root") data_frame.Snapshot( "tree", data_mc_string + "_" + options.name + "_dataframe.root", branch_vec) print("saved dataframe to disk ...")
'{lb} <= {br} && {br} <= {ub}'.format(br=br, lb=train + validation, ub=100) } ######## # Main # ######## if __name__ == '__main__': args = parse_input() gInterpreter.Declare('auto rand_gen = TRandom3({});'.format(args.seed)) init_frame = RDataFrame(args.tree, args.ntp) rand_frame = init_frame.Define('rand_split', 'rand_gen.Uniform(0, 100)') if args.debug: print('loaded {} with {} entries'.format( args.ntp, rand_frame.Count().GetValue())) cuts = get_cuts(args.train_ratio, args.validation_ratio) for sample, cut in cuts.items(): subsample_frame = rand_frame.Filter(cut) output_ntp = join(args.output_dir, '{}_{}.root'.format(get_filename(args.ntp), sample)) subsample_frame.Snapshot(args.tree, output_ntp) if args.debug:
from ROOT import (ROOT, RDataFrame, TCanvas, TH1D) import numpy as np ROOT.EnableImplicitMT() #get file and tree from directory input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" dataframe = RDataFrame(input_tree_name, input_ntuple) #dataframe_with_truep = dataframe.Define('Bs_momentum', 'pow( Bs_TRUEP_X*Bs_TRUEP_X + Bs_TRUEP_Y*Bs_TRUEP_Y + Bs_TRUEP_Z*Bs_TRUEP_Z , 0.5)') df_bkg = dataframe.Filter("Bs_BKGCAT == 0 || Bs_BKGCAT == 50") #df_cuts1 = dataframe_with_truep.Filter("(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "trigger_cuts") cut1 = df_bkg.Filter("(Bs_TAU > 0.0015)", "tau_cut") cut2 = cut1.Filter("(Bs_M > 5150) && (Bs_M < 5550)", "b_mass_cut") cut3 = cut2.Filter("(Jpsi_M > 3020) && (Jpsi_M < 3170)", "jpsi_mass_cut") cut4 = cut3.Filter("(Phi_M > 980) && (Phi_M < 1050)", "phi_mass_cut") cut5 = cut4.Filter("(muplus_PT > 500) && (mumin_PT > 500)", "mu_pt_cut") cut6 = cut5.Filter("(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20", "b_vtx_cut") cut7 = cut6.Filter("(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)", "jpsi_vtx_cut") cut8 = cut7.Filter("(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "phi_vtx_cut") cut9 = cut8.Filter("mumin_PIDmu > 0 && muplus_PIDmu > 0", "mu_id_cut") cut10 = cut9.Filter("Kmin_PIDK > 0 && Kplus_PIDK > 0", "k_id_cut") #print("Mumin eff: ") #mumincut = cut9.Report() #mumincut.Print() print("All stats: ") cutsreport = dataframe.Report()
# initialize RDataFrame data_frame = None # either create the RDataFrame from a ROOT tree in a file or from a ROOT chain made up of several files # or create it directly from an existing RDataFrame including a ROOT tree if not options.is_dataframe: print( "No dataframe was given. Handling the arguments as trees and adding them to chain." ) input_files = args input_chain = ROOT.TChain(options.treename) for input_file in input_files: input_chain.Add(input_file) print("Finished loading chain with ", input_chain.GetEntries(), " entries") data_frame = RDF(input_chain, branch_vec) else: print("Dataframe flag was set. Handling argument as dataframe.") input_file = args[0] data_frame = RDF(options.treename, input_file) print("Finished creating the RDataFrame") # possibly save the created RDataFrame to disk if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + names + "_dataframe.root") data_frame.Snapshot("tree", data_mc_string + "_" + names + "_dataframe.root", branch_vec) print("saved dataframe to disk ...")
parser.add_argument("config", help="Path to the YAML configuration file") args = parser.parse_args() ROOT.ROOT.EnableImplicitMT() gROOT.SetBatch() with open(os.path.expandvars(args.config), 'r') as stream: try: params = yaml.full_load(stream) except yaml.YAMLError as exc: print(exc) # define paths for loading data and storing results mc_path = os.path.expandvars(params['MC_PATH']) data_path = os.path.expandvars(params['DATA_PATH']) dataDF = RDF('DataTable', data_path) mcDF = RDF('SignalTable', mc_path) genDF = RDF('GenTable', mc_path) results_dir = os.environ['HYPERML_RESULTS_{}'.format(params['NBODY'])] file_name = results_dir + '/' + params['FILE_PREFIX'] + '_std.root' results_file = TFile(file_name, 'recreate') for cclass in params['CENTRALITY_CLASS']: cent_dir = results_file.mkdir('{}-{}'.format(cclass[0], cclass[1])) dataCentDF = dataDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1])) mcCentDF = mcDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1])) genCentDF = genDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1]))
frames.append(new_frm) apply_skim_cuts(new_frm, skim_cuts, ref) ######## # Main # ######## if __name__ == '__main__': ntp_dst = '../../ntuples/ref-rdx-run1/Dst-mix/Dst--21_10_21--mix--all--2011-2012--md-mu--phoebe.root' ntp_d0 = '../../ntuples/ref-rdx-run1/D0-mix/D0--21_10_21--mix--all--2011-2012--md-mu--phoebe.root' if len(sys.argv) == 1 or sys.argv[1].lower() == 'dst': print('Working on Dst...') frame_dst = RDataFrame('ntp1', ntp_dst) apply_cuts(frame_dst, DST_CUTS, DST_SKIM_CUTS, DST_REF_NUMS) elif sys.argv[1].lower() == 'd0': print('Working on D0...') frame_d0 = RDataFrame('ntp1', ntp_d0) apply_cuts(frame_d0, D0_CUTS, D0_SKIM_CUTS, D0_REF_NUMS) elif sys.argv[1].lower() == 'dstwsmu': print('Working on Dst wrong-sign Mu...') frame_dst = RDataFrame('ntp1', ntp_dst) apply_cuts(frame_dst, DST_WS_MU_CUTS, DST_SKIM_CUTS, DST_WS_MU_REF_NUMS) elif sys.argv[1].lower() == 'dstwspi': print('Working on Dst wrong-sign slow Pi...') frame_dst = RDataFrame('ntp1', ntp_dst) apply_cuts(frame_dst, DST_WS_PI_CUTS, DST_SKIM_CUTS, DST_WS_PI_REF_NUMS)
from ROOT import ( ROOT, RDataFrame, ) ROOT.EnableImplicitMT() #get file and tree from directory input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" dataframe = RDataFrame(input_tree_name, input_ntuple) trigger_cuts = "(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)" muon_cuts = "!eventmuons_BPVIPCHI2[eventmuons_BPVIPCHI2 < 8].empty() && Bs_len > 0" df_cuts1 = dataframe.Filter(trigger_cuts) df_cuts2 = df_cuts1.Filter(muon_cuts) nentries = dataframe.Count().GetValue() ntriggered = df_cuts1.Count().GetValue() ncut = df_cuts2.Count().GetValue() print("entries: " + str(nentries)) print("triggered: " + str(ntriggered)) print("cut: " + str(ncut)) TrueBs = df_cuts2.Filter("Bs_TRUEID == 531").Count().GetValue() TrueBsbar = df_cuts2.Filter("Bs_TRUEID == -531").Count().GetValue() #define right tags, wrong tags and untagged; tagging efficiency, mistag probability and tagging performance R = df_cuts2.Filter( "(Bs_OSMuon_TAGDEC == 1 && Bs_TRUEID == 531) || (Bs_OSMuon_TAGDEC == -1 && Bs_TRUEID == -531)" ).Count().GetValue()
def measureSFR(self, drawPlot = False): sample_dict = {} samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel) working_samples = samples_singlefake working_samples = setSumWeights(working_samples) print('###########################################################') print('# measuring singlefakerake...') print('# %d samples to be used:'%(len(working_samples))) print('###########################################################') for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)])) chain = TChain('tree') #TChain'ing all data samples together for i,s in enumerate(working_samples): sample = working_samples[0] file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root']) chain.Add(file_name) dataframe = RDataFrame(chain) weight = 'weight * lhe_weight' dataframe = dataframe.Define('w',weight)\ .Define('ptCone',self.ptCone())\ .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\ .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\ .Define('abs_l1_eta','abs(l1_eta)')\ .Define('abs_l2_eta','abs(l2_eta)')\ .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\ .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\ # bins_ptCone = np.array([5.,10., 20., 30., 40.,70., 2000]) # bins_eta = np.array([0., 0.8, 1.2, 2.4]) bins_ptCone = np.array([5.,10., 20., 30., 40.,70.]) bins_eta = np.array([0., 0.8, 1.2, 2.4]) selection_baseline = getSelection(self.channel,'MR_SF') selection_LL_uncorrelated = '(' + ' & '\ .join([\ selection_baseline,\ getSelection(self.channel,'L_L_uncorrelated')\ ]) + ')' selection_TT_uncorrelated = '(' + ' & '\ .join([\ selection_baseline,\ getSelection(self.channel,'L_L_uncorrelated'),\ getSelection(self.channel,'T_T')\ ]) + ')' h_LL_uncorrelated = dataframe\ .Filter(selection_LL_uncorrelated)\ .Histo2D(('h_LL_uncorrelated','h_LL_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w') #name the axis, also initiate the dataframe call h_LL_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta') h_TT_uncorrelated = dataframe\ .Filter(selection_TT_uncorrelated)\ .Histo2D(('h_TT_uncorrelated','h_TT_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w') #name the axis, also initiate the dataframe call h_TT_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta') # preparing the histo and save it into a .root file sfr_TH2_dir = '/home/dehuazhu/HNL/CMSSW_9_4_6_patch1/src/PlotFactory/DataBkgPlots/modules/DDE_singlefake.root' sfr_hist = h_TT_uncorrelated.Clone() # sfr_hist = h_LL_uncorrelated.Clone() # sfrhist = h_baseline.Clone() # sfr_hist.Divide(h_LL_uncorrelated.Clone()) # dfr_hist.SaveAs(sfr_TH2_dir) #uncomment this to save the TH2 # draw the histo if required if drawPlot == True: can = TCanvas('can', '') # sfr_hist.Draw('colzTextE') # sfr_hist.Draw('colz') sfr_hist.Draw() pf.showlumi('%d entries'%(sfr_hist.GetEntries())) # pf.showlogopreliminary() can.Update() set_trace()
from ROOT import (ROOT, RDataFrame, TCanvas) ROOT.EnableImplicitMT() #get file and tree from directory minbias_ntuple = "/user/egovorko/work/public/minbias_JpsiPhi.root" signal_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" minbias_df = RDataFrame(input_tree_name, minbias_ntuple) signal_df = RDataFrame(input_tree_name, signal_ntuple) PIDmu_cut = "eventmuons_PIDmu[eventmuons_PIDmu > 0].size() >= 1" #pt_cut = "eventmuons_PT[eventmuons_PT > 500].size() >= 1" minbias_pidmu = minbias_df.Filter(PIDmu_cut) #minbias_pt = minbias_pidmu.Filter(pt_cut) signal_pidmu = signal_df.Filter(PIDmu_cut) #signal_pt = signal_pidmu.Filter(pt_cut) c1 = TCanvas() c1.Divide(2, 2) c1.cd(1) pidmu1 = minbias_df.Histo1D("eventmuons_PIDmu") pidmu1.SetTitle("Event muon PIDmu distribution (min bias)") pidmu1.GetXaxis().SetTitle("PIDmu") pidmu1.Draw() c1.cd(2) pidmu2 = signal_df.Histo1D("eventmuons_PIDmu") pidmu2.SetTitle("Event muon PIDmu distribution (signal)") pidmu2.GetXaxis().SetTitle("PIDmu") pidmu2.SetLineColor(2)
description='find retention rates for various trigger paths.') parser.add_argument('ntp', help='specify ntuple path.') parser.add_argument('tree', help='specify tree name.') parser.add_argument('-t', '--trigger-paths', nargs='+', help='specify trigger paths.') return parser.parse_args() ######## # Main # ######## if __name__ == '__main__': args = parse_input() frame = RDataFrame(args.tree, args.ntp) cuts = [] for tp in args.trigger_paths: c = frame.Filter( tp, tp) # This is to avoid garbage collector to delete our pointer cuts.append(c) report = frame.Report() report.Print()
######## # Main # ######## if __name__ == '__main__': args = parse_input() histos = glob_histos(args.histo_folder) config = parse_config(args.config) loaded_histos = dict() output_opts = RSnapshotOptions() output_opts.fMode = 'UPDATE' first_write = True for idx, tree in enumerate(config['trees']): print('Processing tree {}...'.format(tree)) init_frame = RDataFrame(tree, args.input_ntp) frames = [init_frame] output_brs = vector('string')(['runNumber', 'eventNumber']) for br, directive in config['config'].items(): if tree in directive['skip_tree']: continue print(' Processing {}...'.format(br)) params = ', '.join(resolve_params(directive['vars'], idx)) histo_name = directive['histo_name'] histo_dim = len(directive['vars']) debug_br = 'debug_{}_bin_idx'.format(br) wt_histo = load_histo(
def get_dataframe(dataset): tchain, friend_tchains = rdf_from_dataset_helper(dataset) rdf = RDataFrame(tchain) setattr(rdf, 'tchain', tchain) setattr(rdf, 'friend_tchains', friend_tchains) return rdf
from ROOT import ( ROOT, RDataFrame, ) ROOT.EnableImplicitMT() #get file and tree from directory input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" dataframe = RDataFrame(input_tree_name, input_ntuple) nentries = dataframe.Count().GetValue() bs_tau = "(Bs_TAU > 0.002)" bs_m = "(Bs_M > 5150) && (Bs_M < 5550)" jpsi_m = "(Jpsi_M > 3020) && (Jpsi_M < 3170)" phi_m = "(Phi_M > 980) && (Phi_M < 1050)" bs_vtx = "(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF < 20)" jpsi_vtx = "(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)" phi_vtx = "(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)" mu_pt = "(muplus_PT > 500) && (mumin_PT > 500)" trigger_cut_list = [ bs_tau, bs_m, jpsi_m, phi_m, bs_vtx, jpsi_vtx, phi_vtx, mu_pt ] efficiency_list = [] cum_efficiency_list = [] cum_eff_df = dataframe for i in range(len(trigger_cut_list)): eff_df = dataframe.Filter(trigger_cut_list[i]) eff = (eff_df.Count().GetValue()) / nentries efficiency_list.append(eff)
"AK15Jet_DeepAK15_TvsQCD", ] branch_vec = ROOT.vector("string")() [branch_vec.push_back(branch) for branch in branches] if not options.is_dataframe: print( "No dataframe was given. Handling the arguments as trees and adding them to chain." ) input_files = args input_chain = ROOT.TChain("MVATree") for input_file in input_files: input_chain.Add(input_file) print("Finished loading chain with ", input_chain.GetEntries(), " entries") data_frame = RDF(input_chain, branch_vec) else: print("Dataframe flag was set. Handling argument as dataframe.") input_file = args[0] data_frame = RDF("tree", input_file) print("Finished converting the chain to RDataFrame") if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + options.name + "_dataframe.root") data_frame.Snapshot( "tree", data_mc_string + "_" + options.name + "_dataframe.root", branch_vec) print("saved dataframe to disk ...")
"pt_pfmet_raw_div_pt_genmet" : "pt_pfmet_raw/pt_genmet", "pt_pfmet_t1_div_pt_genmet" : "pt_pfmet_t1/pt_genmet", "pt_pfmet_t1smear_div_pt_genmet" : "pt_pfmet_t1smear/pt_genmet" } branch_vec = ROOT.vector("string")() [branch_vec.push_back(branch) for branch in branches] if not options.is_dataframe: print ("No dataframe was given. Handling the arguments as trees and adding them to chain.") input_files = args input_chain = ROOT.TChain("METAnalyzer/MET_tree") for input_file in input_files: input_chain.Add(input_file) print ("Finished loading chain with ", input_chain.GetEntries(), " entries") data_frame = RDF(input_chain, branch_vec) else: print ("Dataframe flag was set. Handling argument as dataframe.") input_file = args[0] data_frame = RDF("tree", input_file) print ("Finished converting the chain to RDataFrame") if not options.is_dataframe and options.save: print ("saving dataframe to disk as ", data_mc_string + "_" + names + "_dataframe.root") data_frame.Snapshot("tree", data_mc_string + "_" + names + "_dataframe.root", branch_vec) print ("saved dataframe to disk ...") name = options.name selection = options.selection