Python RDataFrame.Filter Examples, ROOT.RDataFrame.Filter Python Examples

Example #1

0

Show file

def merge_friend(output_ntp_name, friends, tree_branch_dict, config):
    # Here we don't drop any branch. We do only keep specified trees.
    opts = RDF.RSnapshotOptions()
    opts.fMode = 'UPDATE'

    for full_path, tree in friends.items():
        if config[full_path]['keep']:
            rd1 = RDataFrame(tree)
            cut = concat_selections(config[full_path]['selection'])

            if cut:
                rd2 = rd1.Filter(cut)
            else:
                rd2 = rd1

            output_br = make_output_vec(tree_branch_dict[full_path])
            rd2.Snapshot(full_path, output_ntp_name, output_br, opts)

Example #2

0

Show file

File: sample.py Project: vinzenzstampf/plotter

    def __init__(self, name, label, selection, datacard_name, colour,
                 position_in_stack, basedir, postfix, isdata, ismc, issignal,
                 weight, xs):
        self.name = name
        print 'loading', self.name
        self.label = label
        self.selection = selection
        self.datacard_name = datacard_name
        self.colour = colour
        self.position_in_stack = position_in_stack
        self.basedir = basedir
        self.postfix = postfix
        self.isdata = isdata
        self.ismc = ismc
        self.issignal = issignal
        self.weight = weight
        self.xs = xs
        self.nevents = 1.
        self.file = '/'.join([basedir, self.name, postfix])

        if not self.isdata:
            nevents_file = '/'.join(
                [basedir, self.name, 'SkimAnalyzerCount/SkimReport.txt'])
            with open(nevents_file) as ff:
                lines = ff.readlines()
                for line in lines:
                    if 'Sum Norm Weights' in line:
                        self.nevents = float(re.findall(r'\d+', lines[2])[0])
                        break
        tree_file = '/'.join([self.basedir, self.name, self.postfix])

        rdf = RDF('tree', tree_file)
        rdf = rdf.Filter(self.selection)
        # set_trace()
        df = rdf.AsNumpy()
        self.df = pd.DataFrame(df)
        # scale to 1/pb
        self.lumi_scaling = 1. if self.isdata else (self.xs / self.nevents)

Example #3

0

Show file

    data_frame = RDF("tree", input_file)

print("Finished converting the chain to RDataFrame")

if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + options.name + "_dataframe.root")
    data_frame.Snapshot(
        "tree", data_mc_string + "_" + options.name + "_dataframe.root",
        branch_vec)
    print("saved dataframe to disk ...")

#binning = [200, 250, 300, 350, 400, 500, 700, 1500]
# Hadr_Recoil_Pt>200. && N_AK15Jets==1 && N_TightMuons<=2 && N_TightMuons>=1 && Muon_Pt[0]>29. && N_LooseElectrons==0 && N_LoosePhotons==0
# Triggered_HLT_IsoMu27_vX==1
# Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_PFHT60_vX == 1 || Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_vX == 1

binning_x = [250 + (10 * i) for i in range(101)]

reference_events = data_frame.Filter(options.selection).Define(
    "M_W_transverse_0", "M_W_transverse[0]").Define("AK15Jet_Pt_0",
                                                    "AK15Jet_Pt[0]")
histo = None
histo = reference_events.Define("WEIGHT1", "Weight_XS*Weight_GEN_nom").Histo1D(
    (options.variable_x, options.variable_x, len(binning_x) - 1,
     array('d', binning_x)), options.variable_x, "WEIGHT1")

output_file = ROOT.TFile("Binning" + "_" + options.name + ".root", "RECREATE")
output_file.WriteTObject(histo.GetPtr())
output_file.Close()

Example #4

0

Show file

File: CalculateTriggerSFs.py Project: michaelwassmer/useful_scripts

print("Finished converting the chain to RDataFrame")

if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + options.name + "_dataframe.root")
    data_frame.Snapshot(
        "tree", data_mc_string + "_" + options.name + "_dataframe.root",
        branch_vec)
    print("saved dataframe to disk ...")

binning = [200, 250, 300, 350, 400, 500, 700, 1500]
# Hadr_Recoil_Pt>200. && N_AK15Jets==1 && N_TightMuons<=2 && N_TightMuons>=1 && Muon_Pt[0]>29. && N_LooseElectrons==0 && N_LoosePhotons==0
# Triggered_HLT_IsoMu27_vX==1
# Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_PFHT60_vX == 1 || Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_vX == 1
reference_events = data_frame.Filter(options.selection + " && " +
                                     options.ref_trigger)
selected_events = reference_events.Filter(options.trigger)
reference_histo = reference_events.Histo1D(
    (options.variable + "_ref", options.variable, len(binning) - 1,
     array.array("d", binning)),
    options.variable,
)
selection_histo = selected_events.Histo1D(
    (options.variable + "_sel", options.variable, len(binning) - 1,
     array.array("d", binning)),
    options.variable,
)
efficiency = ROOT.TGraphAsymmErrors()
efficiency.Divide(selection_histo.GetPtr(), reference_histo.GetPtr())
efficiency.SetName("efficiency_" + data_mc_string + "_" + options.name)
efficiency.SetTitle("efficiency_" + data_mc_string + "_" + options.name)

Example #5

0

Show file

File: CalculateSampleRenormFactors.py Project: michaelwassmer/useful_scripts

print("Finished converting the chain to RDataFrame")

if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + options.name + "_dataframe.root")
    data_frame.Snapshot(
        "tree", data_mc_string + "_" + options.name + "_dataframe.root",
        branch_vec)
    print("saved dataframe to disk ...")

#binning = [200, 250, 300, 350, 400, 500, 700, 1500]
# Hadr_Recoil_Pt>200. && N_AK15Jets==1 && N_TightMuons<=2 && N_TightMuons>=1 && Muon_Pt[0]>29. && N_LooseElectrons==0 && N_LoosePhotons==0
# Triggered_HLT_IsoMu27_vX==1
# Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_PFHT60_vX == 1 || Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_vX == 1
events = data_frame.Filter(options.selection)

events = events.Filter("""
    //std::cout << \"I'm at entry \" << tdfentry_ << std::endl;
    bool dphi_crit = true;
    for(int i=0;i<N_Jets;i++){
        dphi_crit = dphi_crit && DeltaPhi_AK4Jet_Hadr_Recoil[i]>0.8;
    }
    return dphi_crit;""")

#events = events.Filter(
#    """
#    //std::cout << \"I'm at entry \" << tdfentry_ << std::endl;
#    bool dr_crit = true;
#    for(int j=0;j<N_LooseElectrons;j++){
#        for(int i=0;i<N_Jets;i++){

Example #6

0

Show file

File: effi_bsp.py Project: alisidley/Bachelors-Thesis

from ROOT import (ROOT, RDataFrame, TCanvas, TH1D)
import numpy as np

ROOT.EnableImplicitMT()

#get file and tree from directory
input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"

dataframe = RDataFrame(input_tree_name, input_ntuple)
#dataframe_with_truep = dataframe.Define('Bs_momentum', 'pow( Bs_TRUEP_X*Bs_TRUEP_X + Bs_TRUEP_Y*Bs_TRUEP_Y + Bs_TRUEP_Z*Bs_TRUEP_Z , 0.5)')
df_bkg = dataframe.Filter("Bs_BKGCAT == 0 || Bs_BKGCAT == 50")
#df_cuts1 = dataframe_with_truep.Filter("(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "trigger_cuts")
cut1 = df_bkg.Filter("(Bs_TAU > 0.0015)", "tau_cut")
cut2 = cut1.Filter("(Bs_M > 5150) && (Bs_M < 5550)", "b_mass_cut")
cut3 = cut2.Filter("(Jpsi_M > 3020) && (Jpsi_M < 3170)", "jpsi_mass_cut")
cut4 = cut3.Filter("(Phi_M > 980) && (Phi_M < 1050)", "phi_mass_cut")
cut5 = cut4.Filter("(muplus_PT > 500) && (mumin_PT > 500)", "mu_pt_cut")
cut6 = cut5.Filter("(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20", "b_vtx_cut")
cut7 = cut6.Filter("(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)",
                   "jpsi_vtx_cut")
cut8 = cut7.Filter("(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)",
                   "phi_vtx_cut")
cut9 = cut8.Filter("mumin_PIDmu > 0 && muplus_PIDmu > 0", "mu_id_cut")
cut10 = cut9.Filter("Kmin_PIDK > 0 && Kplus_PIDK > 0", "k_id_cut")

#print("Mumin eff: ")
#mumincut = cut9.Report()
#mumincut.Print()
print("All stats: ")
cutsreport = dataframe.Report()

Example #7

0

Show file

                        branch_vec)
    print("saved dataframe to disk ...")

# a label for the ouput files
name = options.name

# a ROOT style selection string
selection = options.selection

# dictionaries to contain the requested 1D and 2D templates
histos_1D = {}
histos_2D = {}

# apply the selection from above to the RDataFrame and define a weight on the remaining events
# the weight can also be a branch or constructed from several branches, e.g. generator_weight*sample_weight
reference_events = data_frame.Filter(selection).Define("weight",
                                                       options.weight)

# define constructed variables on RDataFrame after selection
for constructed_var in constructed_vars:
    reference_events = reference_events.Define(
        constructed_var, constructed_vars[constructed_var])

# loop over 1D variables given as input arguments
for var_1D in vars_1D:
    var, nbinsx, x_low, x_high = None, None, None, None
    Histo1D_argument = None
    # if a binning and range is given use that binning and range, if not use 50 bins and let ROOT decide the range
    if ";" in var_1D:
        var, nbinsx, x_low, x_high = var_1D.split(";")
        Histo1D_argument = ("{}".format(var),
                            "title;{};arbitrary units".format(var),

Example #8

0

Show file

        print(exc)

# define paths for loading data and storing results
mc_path = os.path.expandvars(params['MC_PATH'])
data_path = os.path.expandvars(params['DATA_PATH'])
dataDF = RDF('DataTable', data_path)
mcDF = RDF('SignalTable', mc_path)
genDF = RDF('GenTable', mc_path)

results_dir = os.environ['HYPERML_RESULTS_{}'.format(params['NBODY'])]
file_name = results_dir + '/' + params['FILE_PREFIX'] + '_std.root'
results_file = TFile(file_name, 'recreate')

for cclass in params['CENTRALITY_CLASS']:
    cent_dir = results_file.mkdir('{}-{}'.format(cclass[0], cclass[1]))
    dataCentDF = dataDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
    mcCentDF = mcDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
    genCentDF = genDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
    genSelected = genCentDF.Filter('std::abs(rapidity) < 0.5')

    bkg_models = params['BKG_MODELS'] if 'BKG_MODELS' in params else ['expo']
    fit_directories = []
    h2raw_counts = []
    h2sigma_mc = TH2D('MCsigmas',
                      ';#it{p}_{T} (GeV/#it{c});c#it{t} (cm);#sigma',
                      len(params['PT_BINS']) - 1,
                      np.array(params['PT_BINS'], 'double'),
                      len(params['CT_BINS']) - 1,
                      np.array(params['CT_BINS'], 'double'))

Example #9

0

Show file

input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"
dataframe = RDataFrame(input_tree_name, input_ntuple)
nentries = dataframe.Count().GetValue()
bs_tau = "(Bs_TAU > 0.002)"
bs_m = "(Bs_M > 5150) && (Bs_M < 5550)"
jpsi_m = "(Jpsi_M > 3020) && (Jpsi_M < 3170)"
phi_m = "(Phi_M > 980) && (Phi_M < 1050)"
bs_vtx = "(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF < 20)"
jpsi_vtx = "(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)"
phi_vtx = "(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)"
mu_pt = "(muplus_PT > 500) && (mumin_PT > 500)"

trigger_cut_list = [
    bs_tau, bs_m, jpsi_m, phi_m, bs_vtx, jpsi_vtx, phi_vtx, mu_pt
]
efficiency_list = []
cum_efficiency_list = []
cum_eff_df = dataframe
for i in range(len(trigger_cut_list)):
    eff_df = dataframe.Filter(trigger_cut_list[i])
    eff = (eff_df.Count().GetValue()) / nentries
    efficiency_list.append(eff)

    cum_eff_df = cum_eff_df.Filter(trigger_cut_list[i])
    cum_eff = (cum_eff_df.Count().GetValue()) / nentries
    cum_efficiency_list.append(cum_eff)

print(efficiency_list)
print(cum_efficiency_list)

Example #10

0

Show file

File: trigger_retention_rate.py Project: umd-lhcb/lhcb-ntuples-gen

        description='find retention rates for various trigger paths.')

    parser.add_argument('ntp', help='specify ntuple path.')

    parser.add_argument('tree', help='specify tree name.')

    parser.add_argument('-t',
                        '--trigger-paths',
                        nargs='+',
                        help='specify trigger paths.')

    return parser.parse_args()


########
# Main #
########

if __name__ == '__main__':
    args = parse_input()

    frame = RDataFrame(args.tree, args.ntp)
    cuts = []
    for tp in args.trigger_paths:
        c = frame.Filter(
            tp, tp)  # This is to avoid garbage collector to delete our pointer
        cuts.append(c)

    report = frame.Report()
    report.Print()

Example #11

0

Show file

File: CreateTemplates.py Project: agoetz-beep/useful_scripts

                        branch_vec)
    print("saved dataframe to disk ...")

name = options.name

selection = options.selection

vars_1D = options.variables_1D.split(",")
vars_2D = options.variables_2D.split(",")

binning_x = [(0 + 10 * i) for i in range(21)]

histos_1D = {}
histos_2D = {}

reference_events = data_frame.Filter(selection)
for var_1D in vars_1D:
    var, nbinsx, x_low, x_high = None, None, None, None
    Histo1D_argument = None
    if ";" in var_1D:
        var, nbinsx, x_low, x_high = var_1D.split(";")
        Histo1D_argument = ("{}".format(var),
                            "title;{};arbitrary units".format(var),
                            int(nbinsx), float(x_low), float(x_high))
    else:
        var = var_1D
        Histo1D_argument = ("{}".format(var),
                            "title;{};arbitrary units".format(var), 50, 1, 1)
    print(var_1D)
    print(Histo1D_argument)
    histos_1D[var] = reference_events.Histo1D(

Example #12

0

Show file

from ROOT import (ROOT, RDataFrame, TCanvas)

ROOT.EnableImplicitMT()

#get file and tree from directory

minbias_ntuple = "/user/egovorko/work/public/minbias_JpsiPhi.root"
signal_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"

minbias_df = RDataFrame(input_tree_name, minbias_ntuple)
signal_df = RDataFrame(input_tree_name, signal_ntuple)
PIDmu_cut = "eventmuons_PIDmu[eventmuons_PIDmu > 0].size() >= 1"
#pt_cut = "eventmuons_PT[eventmuons_PT > 500].size() >= 1"
minbias_pidmu = minbias_df.Filter(PIDmu_cut)
#minbias_pt = minbias_pidmu.Filter(pt_cut)
signal_pidmu = signal_df.Filter(PIDmu_cut)
#signal_pt = signal_pidmu.Filter(pt_cut)

c1 = TCanvas()
c1.Divide(2, 2)
c1.cd(1)
pidmu1 = minbias_df.Histo1D("eventmuons_PIDmu")
pidmu1.SetTitle("Event muon PIDmu distribution (min bias)")
pidmu1.GetXaxis().SetTitle("PIDmu")
pidmu1.Draw()
c1.cd(2)
pidmu2 = signal_df.Histo1D("eventmuons_PIDmu")
pidmu2.SetTitle("Event muon PIDmu distribution (signal)")
pidmu2.GetXaxis().SetTitle("PIDmu")
pidmu2.SetLineColor(2)

Example #13

0

Show file

File: minbias_tageff_rdf.py Project: alisidley/Bachelors-Thesis

from ROOT import (
    ROOT,
    RDataFrame,
)

ROOT.EnableImplicitMT()

#get file and tree from directory
input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"

dataframe = RDataFrame(input_tree_name, input_ntuple)
trigger_cuts = "(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)"
muon_cuts = "!eventmuons_BPVIPCHI2[eventmuons_BPVIPCHI2 < 8].empty() && Bs_len > 0"
df_cuts1 = dataframe.Filter(trigger_cuts)
df_cuts2 = df_cuts1.Filter(muon_cuts)

nentries = dataframe.Count().GetValue()
ntriggered = df_cuts1.Count().GetValue()
ncut = df_cuts2.Count().GetValue()
print("entries: " + str(nentries))
print("triggered: " + str(ntriggered))
print("cut: " + str(ncut))

TrueBs = df_cuts2.Filter("Bs_TRUEID == 531").Count().GetValue()
TrueBsbar = df_cuts2.Filter("Bs_TRUEID == -531").Count().GetValue()

#define right tags, wrong tags and untagged; tagging efficiency, mistag probability and tagging performance
R = df_cuts2.Filter(
    "(Bs_OSMuon_TAGDEC == 1 && Bs_TRUEID == 531) || (Bs_OSMuon_TAGDEC == -1 && Bs_TRUEID == -531)"
).Count().GetValue()

Example #14

0

Show file

File: CreateTemplates.py Project: michaelwassmer/METAnalysis

vars_1D = options.variables_1D.split(",")
vars_2D = options.variables_2D.split(",")
print(vars_1D)
print(vars_2D)
if vars_1D == [""]:
    vars_1D = []
if vars_2D == [""]:
    vars_2D = []

binning_x = [(0+10*i) for i in range(21)]

histos_1D={}
histos_2D={}

reference_events = data_frame.Filter(selection)#.Define("pt_pfmet_t1smear_div_pt_pfmet_t1","pt_pfmet_t1smear/pt_pfmet_t1")
for constructed_var in constructed_vars:
    reference_events=reference_events.Define(constructed_var,constructed_vars[constructed_var])
for var_1D in vars_1D:
    var,nbinsx,x_low,x_high = None,None,None,None
    Histo1D_argument = None
    if ";" in var_1D:
        var,nbinsx,x_low,x_high = var_1D.split(";")
        Histo1D_argument = ("{}".format(var), "title;{};arbitrary units".format(var), int(nbinsx), float(x_low), float(x_high))
    else:
        var = var_1D
        Histo1D_argument = ("{}".format(var), "title;{};arbitrary units".format(var), 50, 1, 1)
    print(var_1D)
    print(Histo1D_argument)
    histos_1D[var]=reference_events.Histo1D(
                                          Histo1D_argument,