def gethistonormforselevt_mult(self, df_evt, dfevtevtsel, label, var, weightfunc=None): if weightfunc is not None: label = label + "_weight" hSelMult = TH1F('sel_' + label, 'sel_' + label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) hNoVtxMult = TH1F('novtx_' + label, 'novtx_' + label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) hVtxOutMult = TH1F('vtxout_' + label, 'vtxout_' + label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) df_to_keep = filter_bit_df(df_evt, 'is_ev_rej', [[], [0, 5, 6, 10, 11]]) # events with reco vtx after previous selection tag_vtx = tag_bit_df(df_to_keep, 'is_ev_rej', [[], [1, 2, 7, 12]]) df_no_vtx = df_to_keep[~tag_vtx.values] # events with reco zvtx > 10 cm after previous selection df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'is_ev_rej', [[3], [1, 2, 7, 12]]) if weightfunc is not None: weightssel = evaluate(weightfunc, dfevtevtsel[var]) weightsinvsel = [1./weight for weight in weightssel] fill_hist(hSelMult, dfevtevtsel[var], weights=weightsinvsel) weightsnovtx = evaluate(weightfunc, df_no_vtx[var]) weightsinvnovtx = [1./weight for weight in weightsnovtx] fill_hist(hNoVtxMult, df_no_vtx[var], weights=weightsinvnovtx) weightsgr10 = evaluate(weightfunc, df_bit_zvtx_gr10[var]) weightsinvgr10 = [1./weight for weight in weightsgr10] fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var], weights=weightsinvgr10) else: fill_hist(hSelMult, dfevtevtsel[var]) fill_hist(hNoVtxMult, df_no_vtx[var]) fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var]) return hSelMult, hNoVtxMult, hVtxOutMult
def test_evaluate_graph(): g = ROOT.TGraph(2) g.SetPoint(0, 0, 1) g.SetPoint(1, 1, 2) assert_array_equal(rnp.evaluate(g, [0, .5, 1]), [1, 1.5, 2]) s = ROOT.TSpline3("spline", g) assert_array_equal(rnp.evaluate(s, [0, .5, 1]), [s.Eval(x) for x in [0, .5, 1]]) # test exceptions arr_2d = RNG.rand(5, 2) assert_raises(TypeError, rnp.evaluate, object(), [1, 2, 3]) assert_raises(ValueError, rnp.evaluate, g, arr_2d) assert_raises(ValueError, rnp.evaluate, s, arr_2d)
def corrections(self, rec): # posterior trigger correction if not self.posterior_trigger_correction: return arr = rec2array(rec[['tau1_pt', 'tau2_pt']]) weights = evaluate(self.trigger_correct, arr) return [weights]
def make_weights(col, func, hist, use_func): """Helper function to extract weights Args: col: np.array array to evaluate/run over func: ROOT.TF1 ROOT function to use for evaluation hist: TH1 ROOT histogram used for getting weights use_func: bool whether or not to use func (otherwise hist) Returns: iterable """ if use_func: return evaluate(func, col) def reg(value): # warning, the histogram has empty bins at high mult. # (>125 ntrkl) so a check is needed to avoid a 1/0 division # when computing the inverse of the weight return value if value != 0. else 1. return [reg(hist.GetBinContent(hist.FindBin(iw))) for iw in col]
def test_evaluate_func(): f1 = ROOT.TF1("f1", "x") f2 = ROOT.TF2("f2", "x*y") f3 = ROOT.TF3("f3", "x*y*z") # generate random arrays arr_1d = RNG.rand(5) arr_2d = RNG.rand(5, 2) arr_3d = RNG.rand(5, 3) arr_4d = RNG.rand(5, 4) assert_array_equal(rnp.evaluate(f1, arr_1d), [f1.Eval(x) for x in arr_1d]) assert_array_equal(rnp.evaluate(f1.GetTitle(), arr_1d), [f1.Eval(x) for x in arr_1d]) assert_array_equal(rnp.evaluate(f2, arr_2d), [f2.Eval(*x) for x in arr_2d]) assert_array_equal(rnp.evaluate(f2.GetTitle(), arr_2d), [f2.Eval(*x) for x in arr_2d]) assert_array_equal(rnp.evaluate(f3, arr_3d), [f3.Eval(*x) for x in arr_3d]) assert_array_equal(rnp.evaluate(f3.GetTitle(), arr_3d), [f3.Eval(*x) for x in arr_3d]) # 4d formula f4 = ROOT.TFormula('test', 'x*y+z*t') assert_array_equal(rnp.evaluate(f4, arr_4d), [f4.Eval(*x) for x in arr_4d]) assert_raises(ValueError, rnp.evaluate, f1, arr_2d) assert_raises(ValueError, rnp.evaluate, f2, arr_3d) assert_raises(ValueError, rnp.evaluate, f2, arr_1d) assert_raises(ValueError, rnp.evaluate, f3, arr_1d) assert_raises(ValueError, rnp.evaluate, f3, arr_2d) assert_raises(ValueError, rnp.evaluate, "f", arr_1d) assert_raises(ValueError, rnp.evaluate, "x*y", arr_1d) assert_raises(ValueError, rnp.evaluate, "x", arr_2d) assert_raises(ValueError, rnp.evaluate, "x*y", arr_3d)
def _calcPulls_TH1(hist, modelFunc): """ Calculate pulls of model function at hist bin centers :param hist: :param modelFunc: :return: """ yValues = root_numpy.hist2array(hist) stds = [hist.GetBinError(iBin) for iBin in range(1, hist.GetNbinsX() + 1)] xValues = getXvalues(hist) expectedValues = root_numpy.evaluate(modelFunc, xValues) return mathFuncs.calcPulls(yValues, stds, expectedValues)
def _calcPulls_graphErrors(graphErrors, modelFunc): """ Calculate pulls of model function at each (x,y) value of graph :param graphErrors: :param modelFunc: :return: """ yValues = [y for y in graphErrors.GetY()] stds = [ey for ey in graphErrors.GetEY()] xValues = getXvalues(graphErrors) expectedValues = root_numpy.evaluate(modelFunc, xValues) return mathFuncs.calcPulls(yValues, stds, expectedValues)
def test_evaluate_hist(): h1 = ROOT.TH1D("h1", "", 10, 0, 1) h1.FillRandom("f1") h2 = ROOT.TH2D("h2", "", 10, 0, 1, 10, 0, 1) h2.FillRandom("f2") h3 = ROOT.TH3D("h3", "", 10, 0, 1, 10, 0, 1, 10, 0, 1) h3.FillRandom("f3") arr_1d = RNG.rand(5) arr_2d = RNG.rand(5, 2) arr_3d = RNG.rand(5, 3) assert_array_equal(rnp.evaluate(h1, arr_1d), [h1.GetBinContent(h1.FindBin(x)) for x in arr_1d]) assert_array_equal(rnp.evaluate(h2, arr_2d), [h2.GetBinContent(h2.FindBin(*x)) for x in arr_2d]) assert_array_equal(rnp.evaluate(h3, arr_3d), [h3.GetBinContent(h3.FindBin(*x)) for x in arr_3d]) assert_raises(ValueError, rnp.evaluate, h1, arr_2d) assert_raises(ValueError, rnp.evaluate, h2, arr_3d) assert_raises(ValueError, rnp.evaluate, h2, arr_1d) assert_raises(ValueError, rnp.evaluate, h3, arr_1d) assert_raises(ValueError, rnp.evaluate, h3, arr_2d)
def value(self, inputs, map_positions): # remove overflows (overwrite with a value just below the histogram boundary) upper_bounds = [self.function_index_map.bounds(axis)[1]-1e-3 for axis in range(len(self.function_index_map.axes))] map_positions_no_overflow = np.apply_along_axis(lambda x:np.minimum(x,upper_bounds), 1, map_positions) # evaluate of a 1D histograms take flatten array as input if self.dim==1: map_positions_no_overflow = map_positions_no_overflow.ravel() indices = evaluate(self.function_index_map, map_positions_no_overflow).astype(np.int32) # Compute isolation for all used working points outputs = [] for i,function in enumerate(self.functions): if i in self.indices: outputs.append(function(inputs)) else: outputs.append(np.array([])) #output = [self.functions[index]([input]) for index,input in zip(indices,inputs)] # Associate the correct working point for each entry output = np.zeros(len(indices)) for i,index in enumerate(indices): output[i] = outputs[index][i] return output
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) neventsorig = len(dfevtorig) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) neventsaftertrigger = len(dfevtorig) if self.runlistrigger is not None: dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger], "run_number") neventsafterrunsel = len(dfevtorig) dfevtevtsel = dfevtorig.query(self.s_evtsel) #validation plot for event selection neventsafterevtsel = len(dfevtevtsel) histonorm = TH1F("histonorm", "histonorm", 10, 0, 10) histonorm.SetBinContent(1, neventsorig) histonorm.GetXaxis().SetBinLabel(1, "tot events") histonorm.SetBinContent(2, neventsaftertrigger) histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger") histonorm.SetBinContent(3, neventsafterrunsel) histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel") histonorm.SetBinContent(4, neventsafterevtsel) histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel") for ibin2 in range(len(self.lvar2_binmin)): binneddf = seldf_singlevar_inclusive(dfevtevtsel, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) histonorm.SetBinContent(5 + ibin2, len(binneddf)) histonorm.GetXaxis().SetBinLabel(5 + ibin2, \ "tot events after mult sel %d - %d" % \ (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])) histonorm.Write() labeltrigger = "hbit%svs%s" % (self.triggerbit, self.v_var2_binning_gen) myfile.cd() hsel, hnovtxmult, hvtxoutmult = \ self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \ labeltrigger, self.v_var2_binning_gen) if self.apply_weights is True and self.mcordata == "data": hselweight, hnovtxmultweight, hvtxoutmultweight = \ self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \ labeltrigger, self.v_var2_binning_gen, self.weightfunc) hselweight.Write() hnovtxmultweight.Write() hvtxoutmultweight.Write() hsel.Write() hnovtxmult.Write() hvtxoutmult.Write() list_df_recodtrig = [] for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load( openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) if self.runlistrigger is not None: df = selectdfrunlist(df, \ self.run_param[self.runlistrigger], "run_number") if self.doml is True: df = df.query(self.l_selml[bin_id]) list_df_recodtrig.append(df) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) if self.apply_weights is True and self.mcordata == "data": weights = evaluate(self.weightfunc, df_bin[self.v_var2_binning_gen]) weightsinv = [1. / weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df( df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write() if self.event_cand_validation is True: df_recodtrig = pd.concat(list_df_recodtrig) df_recodtrig = df_recodtrig.query("inv_mass>%f and inv_mass<%f" % \ (self.mass - 0.15, self.mass + 0.15)) dfevtwithd = pd.merge(dfevtevtsel, df_recodtrig, on=self.v_evtmatch) label = "h%s" % self.v_var2_binning_gen histomult = TH1F(label, label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) fill_hist(histomult, dfevtevtsel[self.v_var2_binning_gen]) histomult.Write() labelwithd = "h%s_withd" % self.v_var2_binning_gen histomultwithd = TH1F(labelwithd, labelwithd, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) fill_hist(histomultwithd, dfevtwithd["%s_x" % self.v_var2_binning_gen]) histomultwithd.Write() # Validation histograms fill_validation_vertex(dfevtorig, dfevtevtsel, df_recodtrig).write() fill_validation_multiplicity(dfevtorig, dfevtevtsel, df_recodtrig).write() fill_validation_candidates(df_recodtrig).write() if self.mcordata == "mc": fill_validation_candidates( df_recodtrig[df_recodtrig[self.v_ismcsignal] == 1], "MC").write()
def test_evaluate(): # create functions and histograms f1 = TF1("f1", "x") f2 = TF2("f2", "x*y") f3 = TF3("f3", "x*y*z") h1 = TH1D("h1", "", 10, 0, 1) h1.FillRandom("f1") h2 = TH2D("h2", "", 10, 0, 1, 10, 0, 1) h2.FillRandom("f2") h3 = TH3D("h3", "", 10, 0, 1, 10, 0, 1, 10, 0, 1) h3.FillRandom("f3") # generate random arrays arr_1d = RNG.rand(5) arr_2d = RNG.rand(5, 2) arr_3d = RNG.rand(5, 3) arr_4d = RNG.rand(5, 4) # evaluate the functions assert_array_equal(rnp.evaluate(f1, arr_1d), [f1.Eval(x) for x in arr_1d]) assert_array_equal(rnp.evaluate(f1.GetTitle(), arr_1d), [f1.Eval(x) for x in arr_1d]) assert_array_equal(rnp.evaluate(f2, arr_2d), [f2.Eval(*x) for x in arr_2d]) assert_array_equal(rnp.evaluate(f2.GetTitle(), arr_2d), [f2.Eval(*x) for x in arr_2d]) assert_array_equal(rnp.evaluate(f3, arr_3d), [f3.Eval(*x) for x in arr_3d]) assert_array_equal(rnp.evaluate(f3.GetTitle(), arr_3d), [f3.Eval(*x) for x in arr_3d]) # 4d formula f4 = TFormula('test', 'x*y+z*t') assert_array_equal(rnp.evaluate(f4, arr_4d), [f4.Eval(*x) for x in arr_4d]) # evaluate the histograms assert_array_equal(rnp.evaluate(h1, arr_1d), [h1.GetBinContent(h1.FindBin(x)) for x in arr_1d]) assert_array_equal(rnp.evaluate(h2, arr_2d), [h2.GetBinContent(h2.FindBin(*x)) for x in arr_2d]) assert_array_equal(rnp.evaluate(h3, arr_3d), [h3.GetBinContent(h3.FindBin(*x)) for x in arr_3d]) # create a graph g = TGraph(2) g.SetPoint(0, 0, 1) g.SetPoint(1, 1, 2) assert_array_equal(rnp.evaluate(g, [0, .5, 1]), [1, 1.5, 2]) from ROOT import TSpline3 s = TSpline3("spline", g) assert_array_equal(rnp.evaluate(s, [0, .5, 1]), [s.Eval(x) for x in [0, .5, 1]]) # test exceptions assert_raises(TypeError, rnp.evaluate, object(), [1, 2, 3]) assert_raises(ValueError, rnp.evaluate, h1, arr_2d) assert_raises(ValueError, rnp.evaluate, h2, arr_3d) assert_raises(ValueError, rnp.evaluate, h2, arr_1d) assert_raises(ValueError, rnp.evaluate, h3, arr_1d) assert_raises(ValueError, rnp.evaluate, h3, arr_2d) assert_raises(ValueError, rnp.evaluate, f1, arr_2d) assert_raises(ValueError, rnp.evaluate, f2, arr_3d) assert_raises(ValueError, rnp.evaluate, f2, arr_1d) assert_raises(ValueError, rnp.evaluate, f3, arr_1d) assert_raises(ValueError, rnp.evaluate, f3, arr_2d) assert_raises(ValueError, rnp.evaluate, g, arr_2d) assert_raises(ValueError, rnp.evaluate, s, arr_2d) assert_raises(ValueError, rnp.evaluate, "f", arr_1d) assert_raises(ValueError, rnp.evaluate, "x*y", arr_1d) assert_raises(ValueError, rnp.evaluate, "x", arr_2d) assert_raises(ValueError, rnp.evaluate, "x*y", arr_3d)
def test_evaluate(): # create functions and histograms f1 = TF1("f1", "x") f2 = TF2("f2", "x*y") f3 = TF3("f3", "x*y*z") h1 = TH1D("h1", "", 10, 0, 1) h1.FillRandom("f1") h2 = TH2D("h2", "", 10, 0, 1, 10, 0, 1) h2.FillRandom("f2") h3 = TH3D("h3", "", 10, 0, 1, 10, 0, 1, 10, 0, 1) h3.FillRandom("f3") # generate random arrays arr_1d = np.random.rand(5) arr_2d = np.random.rand(5, 2) arr_3d = np.random.rand(5, 3) arr_4d = np.random.rand(5, 4) # evaluate the functions assert_array_equal(rnp.evaluate(f1, arr_1d), map(f1.Eval, arr_1d)) assert_array_equal(rnp.evaluate(f1.GetTitle(), arr_1d), map(f1.Eval, arr_1d)) assert_array_equal(rnp.evaluate(f2, arr_2d), [f2.Eval(*x) for x in arr_2d]) assert_array_equal(rnp.evaluate(f2.GetTitle(), arr_2d), [f2.Eval(*x) for x in arr_2d]) assert_array_equal(rnp.evaluate(f3, arr_3d), [f3.Eval(*x) for x in arr_3d]) assert_array_equal(rnp.evaluate(f3.GetTitle(), arr_3d), [f3.Eval(*x) for x in arr_3d]) # 4d formula f4 = TFormula('test', 'x*y+z*t') assert_array_equal(rnp.evaluate(f4, arr_4d), [f4.Eval(*x) for x in arr_4d]) # evaluate the histograms assert_array_equal(rnp.evaluate(h1, arr_1d), [h1.GetBinContent(h1.FindBin(x)) for x in arr_1d]) assert_array_equal(rnp.evaluate(h2, arr_2d), [h2.GetBinContent(h2.FindBin(*x)) for x in arr_2d]) assert_array_equal(rnp.evaluate(h3, arr_3d), [h3.GetBinContent(h3.FindBin(*x)) for x in arr_3d]) # create a graph g = TGraph(2) g.SetPoint(0, 0, 1) g.SetPoint(1, 1, 2) assert_array_equal(rnp.evaluate(g, [0, .5, 1]), [1, 1.5, 2]) from ROOT import TSpline3 s = TSpline3("spline", g) assert_array_equal(rnp.evaluate(s, [0, .5, 1]), map(s.Eval, [0, .5, 1])) # test exceptions assert_raises(TypeError, rnp.evaluate, object(), [1, 2, 3]) assert_raises(ValueError, rnp.evaluate, h1, arr_2d) assert_raises(ValueError, rnp.evaluate, h2, arr_3d) assert_raises(ValueError, rnp.evaluate, h2, arr_1d) assert_raises(ValueError, rnp.evaluate, h3, arr_1d) assert_raises(ValueError, rnp.evaluate, h3, arr_2d) assert_raises(ValueError, rnp.evaluate, f1, arr_2d) assert_raises(ValueError, rnp.evaluate, f2, arr_3d) assert_raises(ValueError, rnp.evaluate, f2, arr_1d) assert_raises(ValueError, rnp.evaluate, f3, arr_1d) assert_raises(ValueError, rnp.evaluate, f3, arr_2d) assert_raises(ValueError, rnp.evaluate, g, arr_2d) assert_raises(ValueError, rnp.evaluate, s, arr_2d) assert_raises(ValueError, rnp.evaluate, "f", arr_1d) assert_raises(ValueError, rnp.evaluate, "x*y", arr_1d) assert_raises(ValueError, rnp.evaluate, "x", arr_2d) assert_raises(ValueError, rnp.evaluate, "x*y", arr_3d)
import matplotlib as mat import matplotlib.pyplot as plt import seaborn as sns import numpy as np import ROOT import root_numpy as rnp import pylandau # Seaborn configuration an Latex sns.set(rc={"figure.figsize":(8,4)}) sns.set_context('paper',font_scale=1.0,rc={'lines.linewidth':1.0}) sns.set_style('whitegrid') mat.rc('text',usetex=True) mat.rc('font',family='serif',serif='palatino') mat.rcParams['text.latex.preamble']=[r'\usepackage[utf8]{inputenc}',r'\usepackage[T1]{fontenc}',r'\usepackage[spanish]{babel}',r'\usepackage{amsmath,amsfonts,amssymb}',r'\usepackage{siunitx}'] # I will generate random variable "time" with a Landau distribution -- useful to model single photoelectron response from PMT time=np.arange(400,700,0.01) dtau=ROOT.TF1('tau0','TMath::Landau(x,492.145,7.59229,1)') tau=rnp.evaluate(dtau,time) # PDF --it may also be generated with pylandau # generate Nevents random samples from the distribution Nevents=1000000 rnd_tau=rnp.random_sample(ROOT.TF1('tau0','TMath::Landau(x,492.145,7.59229,1)',400,700),Nevents,seed=1) c=sns.color_palette(sns.cubehelix_palette(8,start=.25,rot=-.75,reverse=True)) fig,ax=plt.subplots(nrows=1,ncols=1) plt.plot(time,tau,color=c[0]) #plotting the PDF and the distribution of the samples sns.distplot(rnd_tau,hist=True,kde=False,rug=False,ax=ax,norm_hist=True, hist_kws={'histtype':'stepfilled','alpha':0.9},color=c[1]) plt.show()
def process_histomass_single(self, index): myfile = TFile.Open(self.l_histomass[index], "recreate") dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb")) if self.s_trigger is not None: dfevtorig = dfevtorig.query(self.s_trigger) dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") for ibin2 in range(len(self.lvar2_binmin)): mybindfevtorig = seldf_singlevar(dfevtorig, self.v_var2_binning_gen, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) hNorm = TH1F("hEvForNorm_mult%d" % ibin2, "hEvForNorm_mult%d" % ibin2, 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 if not mybindfevtorig.empty: nselevt = len(mybindfevtorig.query("is_ev_rej==0")) norm = getnormforselevt(mybindfevtorig) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() # histmultevt = TH1F("hmultevtmult%d" % ibin2, # "hmultevtmult%d" % ibin2, 100, 0, 100) mybindfevtorig = mybindfevtorig.query("is_ev_rej==0") # fill_hist(histmultevt, mybindfevtorig.n_tracklets_corr) # histmultevt.Write() # h_v0m_ntracklets = TH2F("h_v0m_ntracklets%d" % ibin2, # "h_v0m_ntracklets%d" % ibin2, # 200, 0, 200, 200, -0.5, 1999.5) # v_v0m_ntracklets = np.vstack((mybindfevtorig.n_tracklets_corr, # mybindfevtorig.v0m_corr)).T # fill_hist(h_v0m_ntracklets, v_v0m_ntracklets) # h_v0m_ntracklets.Write() for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb")) if self.doml is True: df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) df_bin = selectdfrunlist(df_bin, \ self.run_param[self.runlistrigger[self.triggerbit]], "run_number") fill_hist(h_invmass, df_bin.inv_mass) if "INT7" not in self.triggerbit and self.mcordata == "data": fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning_gen) funcweighttrig = fileweight.Get(namefunction) if funcweighttrig: weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1./weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() histmult = TH1F("hmultpt%dmult%d" % (ipt, ibin2), "hmultpt%dmult%d" % (ipt, ibin2), 1000, 0, 1000) fill_hist(histmult, df_bin.n_tracklets_corr) histmult.Write() h_v0m_ntrackletsD = TH2F("h_v0m_ntrackletsD%d%d" % (ibin2, ipt), "h_v0m_ntrackletsD%d%d" % (ibin2, ipt), 200, 0, 200, 200, -0.5, 1999.5) v_v0m_ntrackletsD = np.vstack((df_bin.n_tracklets_corr, df_bin.v0m_corr)).T fill_hist(h_v0m_ntrackletsD, v_v0m_ntrackletsD) h_v0m_ntrackletsD.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write()
def main(parameters): # Compute isolation cuts for efficiencies from 0.2 to 1 with smaller steps for larger efficiencies # TODO: put this in parameters effs = np.arange(0.2, 0.5, 0.05) effs = np.concatenate((effs, np.arange(0.5, 0.85, 0.02))) effs = np.concatenate((effs, np.arange(0.85, 0.999, 0.01))) # if no version specified, automatically set version number if parameters.version is 'automatic': # if training of the working points requested # create a new version if parameters.steps.train_workingpoints: version = batch_launcher.job_version(parameters.working_directory) # else, use the last version available else: version = batch_launcher.latest_version( parameters.working_directory) if version is '': raise StandardError( 'Cannot find already trained working points') else: version = parameters.version workingdir = parameters.working_directory + '/' + version inputs = [ parameters.variables.ieta, parameters.variables.ntt, ] target = parameters.variables.iso pileupref = parameters.variables.rho # Train isolation cuts eg_isolations = train_isolation_workingpoints(parameters.steps, effs, parameters.signal_file, parameters.signal_tree, parameters.working_directory, version, parameters.name, inputs, target, pileupref) with root_open(workingdir + '/' + parameters.name + '.root', 'recreate') as output_file: # Save isolation cuts in TH2s for eff, eg_isolation_cuts in zip(effs, eg_isolations): histo = function2th2(eg_isolation_cuts.predict, quantile_regression.binning[inputs[0]], quantile_regression.binning[inputs[1]]) histo.SetName(parameters.name + '_' + str(eff)) histo.Write() # Test isolation cuts vs offline variables if parameters.steps.test_workingpoints: print '> Checking efficiencies vs offline variables' graphs = test_efficiency(functions=[(lambda x,isolation=iso:np.less(x[:,[len(inputs)]].ravel(),isolation.predict(x[:,range(len(inputs))]))) for iso in eg_isolations], \ function_inputs=inputs+[target],\ # TODO: define these variables in parameters variables=['offl_eta','offl_pt', 'rho', 'npv'],\ inputfile=parameters.signal_file,\ tree=parameters.signal_tree,\ # TODO: Define the selection in parameters selection='et>0'\ ) for graph in graphs: graph.Write() print '> Applying eta/et efficiency shape' # TODO: Add the possibility to perform automatic optimization of the efficiency shape eg_isolation_eta_et = CombinedWorkingPoints( np.append(effs, [1.]), [iso.predict for iso in eg_isolations] + [lambda x: np.full(x.shape[0], 9999.)], parameters.eta_pt_optimization.eta_pt_efficiency_shapes) print '> Compress input variables' branches = [ parameters.variables.ieta, parameters.variables.ntt, parameters.variables.et, ] data = root2array( parameters.signal_file, treename=parameters.signal_tree, branches=branches, # TODO: Define the selection in parameters selection='et>0') data = data.view( (np.float64, len(data.dtype.names))).astype(np.float32) iso_cuts = eg_isolation_eta_et.value(data[:, [0, 1]], data[:, [0, 2]]) eg_isolation_compressed = events2th3(data, iso_cuts, (parameters.compression.eta, ), (parameters.compression.ntt, ), (parameters.compression.et, )) eg_isolation_compressed.SetName('isolation_compressed_') eg_isolation_compressed.Write() graphs_compressed = test_efficiency(functions=(lambda x: np.less(x[:,[3]].ravel(),evaluate(eg_isolation_compressed, x[:,range(3)]))), \ function_inputs=branches+[parameters.variables.iso],\ # TODO: define these variables in parameters variables=['offl_eta','offl_pt', 'rho', 'npv'],\ inputfile=parameters.signal_file,\ tree=parameters.signal_tree,\ # TODO: Define the selection in parameters selection='et>0'\ ) for graph in graphs_compressed: graph.Write()
def cutvariation_masshistos(self, min_cv_cut, max_cv_cut): myfile = TFile.Open(self.n_filemass_cutvar, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged_data[bin_id], "rb")) stepsmin = (self.lpt_probcutfin[bin_id] - min_cv_cut[ipt]) / self.p_ncutvar stepsmax = (max_cv_cut[ipt] - self.lpt_probcutfin[bin_id]) / self.p_ncutvar ntrials = 2 * self.p_ncutvar + 1 icvmax = 1 if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger_data is not None: df = df.query(self.s_trigger_data) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) print("Using run selection for mass histo", self.runlistrigger[self.triggerbit], \ "for period", self.period) df = selectdfrunlist(df, self.run_param[self.runlistrigger[self.triggerbit]], \ "run_number") for icv in range(ntrials): if icv < self.p_ncutvar: selml_cvval = min_cv_cut[ipt] + icv * stepsmin elif icv == self.p_ncutvar: selml_cvval = self.lpt_probcutfin[bin_id] else: selml_cvval = self.lpt_probcutfin[bin_id] + icvmax * stepsmax icvmax = icvmax + 1 selml_cv = "y_test_prob%s>%s" % (self.p_modelname, selml_cvval) print("Cutting on: ", selml_cv) df = df.query(selml_cv) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%d_%s%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], icv, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) if "INT7" not in self.triggerbit: fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning) funcweighttrig = fileweight.Get(namefunction) if funcweighttrig: weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1./weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write()
def main(parameters): # Compute isolation cuts for efficiencies from 0.2 to 1 with smaller steps for larger efficiencies # TODO: put this in parameters effs = np.arange(0.2,0.5,0.05) effs = np.concatenate((effs,np.arange(0.5,0.85,0.02))) effs = np.concatenate((effs,np.arange(0.85,0.999,0.01))) # if no version specified, automatically set version number if parameters.version is 'automatic': # if training of the working points requested # create a new version if parameters.steps.train_workingpoints: version = batch_launcher.job_version(parameters.working_directory) # else, use the last version available else: version = batch_launcher.latest_version(parameters.working_directory) if version is '': raise StandardError('Cannot find already trained working points') else: version = parameters.version workingdir = parameters.working_directory+'/'+version inputs = [ parameters.variables.ieta, parameters.variables.ntt, ] target = parameters.variables.iso pileupref = parameters.variables.rho # Train isolation cuts eg_isolations = train_isolation_workingpoints(parameters.steps, effs, parameters.signal_file, parameters.signal_tree, parameters.working_directory, version, parameters.name, inputs, target, pileupref) with root_open(workingdir+'/'+parameters.name+'.root', 'recreate') as output_file: # Save isolation cuts in TH2s for eff,eg_isolation_cuts in zip(effs,eg_isolations): histo = function2th2(eg_isolation_cuts.predict, quantile_regression.binning[inputs[0]], quantile_regression.binning[inputs[1]]) histo.SetName(parameters.name+'_'+str(eff)) histo.Write() # Test isolation cuts vs offline variables if parameters.steps.test_workingpoints: print '> Checking efficiencies vs offline variables' graphs = test_efficiency(functions=[(lambda x,isolation=iso:np.less(x[:,[len(inputs)]].ravel(),isolation.predict(x[:,range(len(inputs))]))) for iso in eg_isolations], \ function_inputs=inputs+[target],\ # TODO: define these variables in parameters variables=['offl_eta','offl_pt', 'rho', 'npv'],\ inputfile=parameters.signal_file,\ tree=parameters.signal_tree,\ # TODO: Define the selection in parameters selection='et>0'\ ) for graph in graphs: graph.Write() print '> Applying eta/et efficiency shape' # TODO: Add the possibility to perform automatic optimization of the efficiency shape eg_isolation_eta_et = CombinedWorkingPoints(np.append(effs,[1.]), [iso.predict for iso in eg_isolations]+[lambda x:np.full(x.shape[0],9999.)], parameters.eta_pt_optimization.eta_pt_efficiency_shapes) print '> Compress input variables' branches = [ parameters.variables.ieta, parameters.variables.ntt, parameters.variables.et, ] data = root2array(parameters.signal_file, treename=parameters.signal_tree, branches=branches, # TODO: Define the selection in parameters selection='et>0') data = data.view((np.float64, len(data.dtype.names))).astype(np.float32) iso_cuts = eg_isolation_eta_et.value(data[:,[0,1]],data[:,[0,2]]) eg_isolation_compressed = events2th3(data, iso_cuts, (parameters.compression.eta,), (parameters.compression.ntt,), (parameters.compression.et,)) eg_isolation_compressed.SetName('isolation_compressed_') eg_isolation_compressed.Write() graphs_compressed = test_efficiency(functions=(lambda x: np.less(x[:,[3]].ravel(),evaluate(eg_isolation_compressed, x[:,range(3)]))), \ function_inputs=branches+[parameters.variables.iso],\ # TODO: define these variables in parameters variables=['offl_eta','offl_pt', 'rho', 'npv'],\ inputfile=parameters.signal_file,\ tree=parameters.signal_tree,\ # TODO: Define the selection in parameters selection='et>0'\ ) for graph in graphs_compressed: graph.Write()
def process_histomass(self): myfile = TFile.Open(self.n_filemass, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb")) df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) triggerbit = self.datap["analysis"][self.typean]["triggerbit"] if "INT7" not in triggerbit and self.mcordata == "data": fileweight_name = "%s/correctionsweights.root" % self.d_val fileweight = TFile.Open(fileweight_name, "read") namefunction = "funcnorm_%s" % self.triggerbit funcweighttrig = fileweight.Get(namefunction) weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning]) weightsinv = [1. / weight for weight in weights] fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write() if self.mcordata == "mc": df_bin[self.v_ismcrefl] = np.array(tag_bit_df( df_bin, self.v_bitvar, self.b_mcrefl), dtype=int) df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1] df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1] h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) fill_hist(h_invmass_sig, df_bin_sig.inv_mass) fill_hist(h_invmass_refl, df_bin_refl.inv_mass) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write()