def merge_friend(output_ntp_name, friends, tree_branch_dict, config): # Here we don't drop any branch. We do only keep specified trees. opts = RDF.RSnapshotOptions() opts.fMode = 'UPDATE' for full_path, tree in friends.items(): if config[full_path]['keep']: rd1 = RDataFrame(tree) cut = concat_selections(config[full_path]['selection']) if cut: rd2 = rd1.Filter(cut) else: rd2 = rd1 output_br = make_output_vec(tree_branch_dict[full_path]) rd2.Snapshot(full_path, output_ntp_name, output_br, opts)
def __init__(self, name, label, selection, datacard_name, colour, position_in_stack, basedir, postfix, isdata, ismc, issignal, weight, xs): self.name = name print 'loading', self.name self.label = label self.selection = selection self.datacard_name = datacard_name self.colour = colour self.position_in_stack = position_in_stack self.basedir = basedir self.postfix = postfix self.isdata = isdata self.ismc = ismc self.issignal = issignal self.weight = weight self.xs = xs self.nevents = 1. self.file = '/'.join([basedir, self.name, postfix]) if not self.isdata: nevents_file = '/'.join( [basedir, self.name, 'SkimAnalyzerCount/SkimReport.txt']) with open(nevents_file) as ff: lines = ff.readlines() for line in lines: if 'Sum Norm Weights' in line: self.nevents = float(re.findall(r'\d+', lines[2])[0]) break tree_file = '/'.join([self.basedir, self.name, self.postfix]) rdf = RDF('tree', tree_file) rdf = rdf.Filter(self.selection) # set_trace() df = rdf.AsNumpy() self.df = pd.DataFrame(df) # scale to 1/pb self.lumi_scaling = 1. if self.isdata else (self.xs / self.nevents)
data_frame = RDF("tree", input_file) print("Finished converting the chain to RDataFrame") if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + options.name + "_dataframe.root") data_frame.Snapshot( "tree", data_mc_string + "_" + options.name + "_dataframe.root", branch_vec) print("saved dataframe to disk ...") #binning = [200, 250, 300, 350, 400, 500, 700, 1500] # Hadr_Recoil_Pt>200. && N_AK15Jets==1 && N_TightMuons<=2 && N_TightMuons>=1 && Muon_Pt[0]>29. && N_LooseElectrons==0 && N_LoosePhotons==0 # Triggered_HLT_IsoMu27_vX==1 # Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_PFHT60_vX == 1 || Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_vX == 1 binning_x = [250 + (10 * i) for i in range(101)] reference_events = data_frame.Filter(options.selection).Define( "M_W_transverse_0", "M_W_transverse[0]").Define("AK15Jet_Pt_0", "AK15Jet_Pt[0]") histo = None histo = reference_events.Define("WEIGHT1", "Weight_XS*Weight_GEN_nom").Histo1D( (options.variable_x, options.variable_x, len(binning_x) - 1, array('d', binning_x)), options.variable_x, "WEIGHT1") output_file = ROOT.TFile("Binning" + "_" + options.name + ".root", "RECREATE") output_file.WriteTObject(histo.GetPtr()) output_file.Close()
print("Finished converting the chain to RDataFrame") if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + options.name + "_dataframe.root") data_frame.Snapshot( "tree", data_mc_string + "_" + options.name + "_dataframe.root", branch_vec) print("saved dataframe to disk ...") binning = [200, 250, 300, 350, 400, 500, 700, 1500] # Hadr_Recoil_Pt>200. && N_AK15Jets==1 && N_TightMuons<=2 && N_TightMuons>=1 && Muon_Pt[0]>29. && N_LooseElectrons==0 && N_LoosePhotons==0 # Triggered_HLT_IsoMu27_vX==1 # Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_PFHT60_vX == 1 || Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_vX == 1 reference_events = data_frame.Filter(options.selection + " && " + options.ref_trigger) selected_events = reference_events.Filter(options.trigger) reference_histo = reference_events.Histo1D( (options.variable + "_ref", options.variable, len(binning) - 1, array.array("d", binning)), options.variable, ) selection_histo = selected_events.Histo1D( (options.variable + "_sel", options.variable, len(binning) - 1, array.array("d", binning)), options.variable, ) efficiency = ROOT.TGraphAsymmErrors() efficiency.Divide(selection_histo.GetPtr(), reference_histo.GetPtr()) efficiency.SetName("efficiency_" + data_mc_string + "_" + options.name) efficiency.SetTitle("efficiency_" + data_mc_string + "_" + options.name)
print("Finished converting the chain to RDataFrame") if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + options.name + "_dataframe.root") data_frame.Snapshot( "tree", data_mc_string + "_" + options.name + "_dataframe.root", branch_vec) print("saved dataframe to disk ...") #binning = [200, 250, 300, 350, 400, 500, 700, 1500] # Hadr_Recoil_Pt>200. && N_AK15Jets==1 && N_TightMuons<=2 && N_TightMuons>=1 && Muon_Pt[0]>29. && N_LooseElectrons==0 && N_LoosePhotons==0 # Triggered_HLT_IsoMu27_vX==1 # Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_PFHT60_vX == 1 || Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_vX == 1 events = data_frame.Filter(options.selection) events = events.Filter(""" //std::cout << \"I'm at entry \" << tdfentry_ << std::endl; bool dphi_crit = true; for(int i=0;i<N_Jets;i++){ dphi_crit = dphi_crit && DeltaPhi_AK4Jet_Hadr_Recoil[i]>0.8; } return dphi_crit;""") #events = events.Filter( # """ # //std::cout << \"I'm at entry \" << tdfentry_ << std::endl; # bool dr_crit = true; # for(int j=0;j<N_LooseElectrons;j++){ # for(int i=0;i<N_Jets;i++){
from ROOT import (ROOT, RDataFrame, TCanvas, TH1D) import numpy as np ROOT.EnableImplicitMT() #get file and tree from directory input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" dataframe = RDataFrame(input_tree_name, input_ntuple) #dataframe_with_truep = dataframe.Define('Bs_momentum', 'pow( Bs_TRUEP_X*Bs_TRUEP_X + Bs_TRUEP_Y*Bs_TRUEP_Y + Bs_TRUEP_Z*Bs_TRUEP_Z , 0.5)') df_bkg = dataframe.Filter("Bs_BKGCAT == 0 || Bs_BKGCAT == 50") #df_cuts1 = dataframe_with_truep.Filter("(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "trigger_cuts") cut1 = df_bkg.Filter("(Bs_TAU > 0.0015)", "tau_cut") cut2 = cut1.Filter("(Bs_M > 5150) && (Bs_M < 5550)", "b_mass_cut") cut3 = cut2.Filter("(Jpsi_M > 3020) && (Jpsi_M < 3170)", "jpsi_mass_cut") cut4 = cut3.Filter("(Phi_M > 980) && (Phi_M < 1050)", "phi_mass_cut") cut5 = cut4.Filter("(muplus_PT > 500) && (mumin_PT > 500)", "mu_pt_cut") cut6 = cut5.Filter("(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20", "b_vtx_cut") cut7 = cut6.Filter("(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)", "jpsi_vtx_cut") cut8 = cut7.Filter("(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "phi_vtx_cut") cut9 = cut8.Filter("mumin_PIDmu > 0 && muplus_PIDmu > 0", "mu_id_cut") cut10 = cut9.Filter("Kmin_PIDK > 0 && Kplus_PIDK > 0", "k_id_cut") #print("Mumin eff: ") #mumincut = cut9.Report() #mumincut.Print() print("All stats: ") cutsreport = dataframe.Report()
branch_vec) print("saved dataframe to disk ...") # a label for the ouput files name = options.name # a ROOT style selection string selection = options.selection # dictionaries to contain the requested 1D and 2D templates histos_1D = {} histos_2D = {} # apply the selection from above to the RDataFrame and define a weight on the remaining events # the weight can also be a branch or constructed from several branches, e.g. generator_weight*sample_weight reference_events = data_frame.Filter(selection).Define("weight", options.weight) # define constructed variables on RDataFrame after selection for constructed_var in constructed_vars: reference_events = reference_events.Define( constructed_var, constructed_vars[constructed_var]) # loop over 1D variables given as input arguments for var_1D in vars_1D: var, nbinsx, x_low, x_high = None, None, None, None Histo1D_argument = None # if a binning and range is given use that binning and range, if not use 50 bins and let ROOT decide the range if ";" in var_1D: var, nbinsx, x_low, x_high = var_1D.split(";") Histo1D_argument = ("{}".format(var), "title;{};arbitrary units".format(var),
print(exc) # define paths for loading data and storing results mc_path = os.path.expandvars(params['MC_PATH']) data_path = os.path.expandvars(params['DATA_PATH']) dataDF = RDF('DataTable', data_path) mcDF = RDF('SignalTable', mc_path) genDF = RDF('GenTable', mc_path) results_dir = os.environ['HYPERML_RESULTS_{}'.format(params['NBODY'])] file_name = results_dir + '/' + params['FILE_PREFIX'] + '_std.root' results_file = TFile(file_name, 'recreate') for cclass in params['CENTRALITY_CLASS']: cent_dir = results_file.mkdir('{}-{}'.format(cclass[0], cclass[1])) dataCentDF = dataDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1])) mcCentDF = mcDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1])) genCentDF = genDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1])) genSelected = genCentDF.Filter('std::abs(rapidity) < 0.5') bkg_models = params['BKG_MODELS'] if 'BKG_MODELS' in params else ['expo'] fit_directories = [] h2raw_counts = [] h2sigma_mc = TH2D('MCsigmas', ';#it{p}_{T} (GeV/#it{c});c#it{t} (cm);#sigma', len(params['PT_BINS']) - 1, np.array(params['PT_BINS'], 'double'), len(params['CT_BINS']) - 1, np.array(params['CT_BINS'], 'double'))
input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" dataframe = RDataFrame(input_tree_name, input_ntuple) nentries = dataframe.Count().GetValue() bs_tau = "(Bs_TAU > 0.002)" bs_m = "(Bs_M > 5150) && (Bs_M < 5550)" jpsi_m = "(Jpsi_M > 3020) && (Jpsi_M < 3170)" phi_m = "(Phi_M > 980) && (Phi_M < 1050)" bs_vtx = "(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF < 20)" jpsi_vtx = "(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)" phi_vtx = "(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)" mu_pt = "(muplus_PT > 500) && (mumin_PT > 500)" trigger_cut_list = [ bs_tau, bs_m, jpsi_m, phi_m, bs_vtx, jpsi_vtx, phi_vtx, mu_pt ] efficiency_list = [] cum_efficiency_list = [] cum_eff_df = dataframe for i in range(len(trigger_cut_list)): eff_df = dataframe.Filter(trigger_cut_list[i]) eff = (eff_df.Count().GetValue()) / nentries efficiency_list.append(eff) cum_eff_df = cum_eff_df.Filter(trigger_cut_list[i]) cum_eff = (cum_eff_df.Count().GetValue()) / nentries cum_efficiency_list.append(cum_eff) print(efficiency_list) print(cum_efficiency_list)
description='find retention rates for various trigger paths.') parser.add_argument('ntp', help='specify ntuple path.') parser.add_argument('tree', help='specify tree name.') parser.add_argument('-t', '--trigger-paths', nargs='+', help='specify trigger paths.') return parser.parse_args() ######## # Main # ######## if __name__ == '__main__': args = parse_input() frame = RDataFrame(args.tree, args.ntp) cuts = [] for tp in args.trigger_paths: c = frame.Filter( tp, tp) # This is to avoid garbage collector to delete our pointer cuts.append(c) report = frame.Report() report.Print()
branch_vec) print("saved dataframe to disk ...") name = options.name selection = options.selection vars_1D = options.variables_1D.split(",") vars_2D = options.variables_2D.split(",") binning_x = [(0 + 10 * i) for i in range(21)] histos_1D = {} histos_2D = {} reference_events = data_frame.Filter(selection) for var_1D in vars_1D: var, nbinsx, x_low, x_high = None, None, None, None Histo1D_argument = None if ";" in var_1D: var, nbinsx, x_low, x_high = var_1D.split(";") Histo1D_argument = ("{}".format(var), "title;{};arbitrary units".format(var), int(nbinsx), float(x_low), float(x_high)) else: var = var_1D Histo1D_argument = ("{}".format(var), "title;{};arbitrary units".format(var), 50, 1, 1) print(var_1D) print(Histo1D_argument) histos_1D[var] = reference_events.Histo1D(
from ROOT import (ROOT, RDataFrame, TCanvas) ROOT.EnableImplicitMT() #get file and tree from directory minbias_ntuple = "/user/egovorko/work/public/minbias_JpsiPhi.root" signal_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" minbias_df = RDataFrame(input_tree_name, minbias_ntuple) signal_df = RDataFrame(input_tree_name, signal_ntuple) PIDmu_cut = "eventmuons_PIDmu[eventmuons_PIDmu > 0].size() >= 1" #pt_cut = "eventmuons_PT[eventmuons_PT > 500].size() >= 1" minbias_pidmu = minbias_df.Filter(PIDmu_cut) #minbias_pt = minbias_pidmu.Filter(pt_cut) signal_pidmu = signal_df.Filter(PIDmu_cut) #signal_pt = signal_pidmu.Filter(pt_cut) c1 = TCanvas() c1.Divide(2, 2) c1.cd(1) pidmu1 = minbias_df.Histo1D("eventmuons_PIDmu") pidmu1.SetTitle("Event muon PIDmu distribution (min bias)") pidmu1.GetXaxis().SetTitle("PIDmu") pidmu1.Draw() c1.cd(2) pidmu2 = signal_df.Histo1D("eventmuons_PIDmu") pidmu2.SetTitle("Event muon PIDmu distribution (signal)") pidmu2.GetXaxis().SetTitle("PIDmu") pidmu2.SetLineColor(2)
from ROOT import ( ROOT, RDataFrame, ) ROOT.EnableImplicitMT() #get file and tree from directory input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root" input_tree_name = "Bs2jpsiphi/DecayTree" dataframe = RDataFrame(input_tree_name, input_ntuple) trigger_cuts = "(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)" muon_cuts = "!eventmuons_BPVIPCHI2[eventmuons_BPVIPCHI2 < 8].empty() && Bs_len > 0" df_cuts1 = dataframe.Filter(trigger_cuts) df_cuts2 = df_cuts1.Filter(muon_cuts) nentries = dataframe.Count().GetValue() ntriggered = df_cuts1.Count().GetValue() ncut = df_cuts2.Count().GetValue() print("entries: " + str(nentries)) print("triggered: " + str(ntriggered)) print("cut: " + str(ncut)) TrueBs = df_cuts2.Filter("Bs_TRUEID == 531").Count().GetValue() TrueBsbar = df_cuts2.Filter("Bs_TRUEID == -531").Count().GetValue() #define right tags, wrong tags and untagged; tagging efficiency, mistag probability and tagging performance R = df_cuts2.Filter( "(Bs_OSMuon_TAGDEC == 1 && Bs_TRUEID == 531) || (Bs_OSMuon_TAGDEC == -1 && Bs_TRUEID == -531)" ).Count().GetValue()
vars_1D = options.variables_1D.split(",") vars_2D = options.variables_2D.split(",") print(vars_1D) print(vars_2D) if vars_1D == [""]: vars_1D = [] if vars_2D == [""]: vars_2D = [] binning_x = [(0+10*i) for i in range(21)] histos_1D={} histos_2D={} reference_events = data_frame.Filter(selection)#.Define("pt_pfmet_t1smear_div_pt_pfmet_t1","pt_pfmet_t1smear/pt_pfmet_t1") for constructed_var in constructed_vars: reference_events=reference_events.Define(constructed_var,constructed_vars[constructed_var]) for var_1D in vars_1D: var,nbinsx,x_low,x_high = None,None,None,None Histo1D_argument = None if ";" in var_1D: var,nbinsx,x_low,x_high = var_1D.split(";") Histo1D_argument = ("{}".format(var), "title;{};arbitrary units".format(var), int(nbinsx), float(x_low), float(x_high)) else: var = var_1D Histo1D_argument = ("{}".format(var), "title;{};arbitrary units".format(var), 50, 1, 1) print(var_1D) print(Histo1D_argument) histos_1D[var]=reference_events.Histo1D( Histo1D_argument,