def produce_pickle_file(hist_passed_data, hist_total_data, file_name): output_pickle = open(file_name, 'wb') dictionary = {} number_of_pt_bin_edges = len(jet_pt_bins) data_efficiency = Efficiency(hist_passed_data, hist_total_data) for i in range(number_of_pt_bin_edges - 1): lower_edge_pt = jet_pt_bins[i] upper_edge_pt = jet_pt_bins[i + 1] pt_bin_range = 'pt_' + str(lower_edge_pt) + '_' + str(upper_edge_pt) dictionary[pt_bin_range] = {} data_efficiency_in_bin = data_efficiency.GetEfficiency(i + 1) data_efficiency_in_bin_error_up = data_efficiency.GetEfficiencyErrorUp( i + 1) data_efficiency_in_bin_error_down = data_efficiency.GetEfficiencyErrorLow( i + 1) dictionary[pt_bin_range]['data'] = { 'efficiency': data_efficiency_in_bin, 'err_up': data_efficiency_in_bin_error_up, 'err_down': data_efficiency_in_bin_error_down, } pickle.dump(dictionary, output_pickle)
def efficiencies_plot(ana, category, discri_var, plot_level, working_points): vars = { 'pt': VARIABLES['pt'], 'eta': VARIABLES['eta'], 'good_npv': VARIABLES['good_npv'], 'averageintpercrossing': VARIABLES['averageintpercrossing'], } canvases = {} efficiencies = {} for v in vars.keys(): efficiencies[v] = [] for wp in working_points: cut = wp.cut if isinstance(wp.cut, str) else '{0} >= {1}'.format( discri_var, wp.cut) hist_samples = ana.get_hist_samples_array(vars, plot_level, category=category) hist_samples_cut = ana.get_hist_samples_array(vars, plot_level, category=category, cuts=cut) for v in vars.keys(): efficiencies[v].append( Efficiency(hist_samples_cut[v]['tau'], hist_samples[v]['tau'], title=wp.name)) for v, effs in efficiencies.items(): canvases[v] = draw_efficiencies(effs, plot_level + '_' + v, category) return canvases
def get_efficiency(parentDirectory, denomHistName, numerHistName, binsToMerge=None, numerAdd=None): sampleNames = [k.name for k in parentDirectory.keys()] if len(sampleNames) == 0: raise ValueError('No sample directory found!') hDenom = getattr(getattr(parentDirectory, sampleNames[0]), denomHistName).clone() hNumer = getattr(getattr(parentDirectory, sampleNames[0]), numerHistName).clone() if numerAdd: hNumer.Add( getattr(getattr(parentDirectory, sampleNames[0]), numerHistName + numerAdd).clone()) for s in sampleNames[1:]: hDenom.Add( getattr(getattr(parentDirectory, s), denomHistName).clone()) hNumer.Add( getattr(getattr(parentDirectory, s), numerHistName).clone()) if numerAdd: hNumer.Add( getattr(getattr(parentDirectory, s), numerHistName + numerAdd).clone()) if binsToMerge: hDenom = hDenom.merge_bins(binsToMerge) hNumer = hNumer.merge_bins(binsToMerge) return Efficiency(hNumer, hDenom)
def rejection_linear(eff): htot = asrootpy(eff.GetTotalHistogram()).Clone() hpass = asrootpy(eff.GetPassedHistogram()).Clone() hnotpass = htot - hpass hnotpass.Sumw2() rej = Efficiency(hnotpass, htot, name='Rejlin_{0}'.format(eff.name), title=eff.title) return rej
def get_efficiency_array(): eff_array = {} for var in VARIABLES['plotting']: hpassed = Hist(var['bins'], var['range'][0], var['range'][1]) htotal = Hist(var['bins'], var['range'][0], var['range'][1]) eff_array[var['name']] = Efficiency(hpassed, htotal, title=get_label(var)) return eff_array
def get_efficiency_graphs(d): efficiency_graphs = [] #for s in sample_names2: # sample_dir = getattr(d2, s) # numer_ = getattr(sample_dir, 'lxyDpToEl__match').clone() # denom_ = getattr(sample_dir, 'lxyDpToEl__total').clone() # g = Efficiency(numer_, denom_).graph # efficiency_graphs.append(g) for s in sample_names: sample_dir = getattr(d, s) numer_ = getattr(sample_dir, 'lxyDpToEl__match').clone() denom_ = getattr(sample_dir, 'lxyDpToEl__total').clone() g = Efficiency(numer_, denom_).graph efficiency_graphs.append(g) return efficiency_graphs
def test_overall_efficiency(): for stat_op in range(0, 8): Eff = Efficiency(Hist(20, -3, 3), Hist(20, -3, 3)) Eff_1bin = Efficiency(Hist(1, -3, 3), Hist(1, -3, 3)) Eff.SetStatisticOption(stat_op) Eff_1bin.SetStatisticOption(stat_op) for i in range(1000): x = gauss(0, 3.6) w = uniform(0, 1) passed = w > 0.5 Eff.Fill(passed, x) Eff_1bin.Fill(passed, x) assert_almost_equal(Eff.overall_efficiency(overflow=True)[0], Eff_1bin.overall_efficiency(overflow=True)[0]) assert_almost_equal(Eff.overall_efficiency(overflow=True)[1], Eff_1bin.overall_efficiency(overflow=True)[1]) assert_almost_equal(Eff.overall_efficiency(overflow=True)[2], Eff_1bin.overall_efficiency(overflow=True)[2])
def eff_curve(accept, total, var, weight_field=None, prefix='off'): """ Draw the efficiency curve from two record arrays (selection already applied so computation should be fast over several vars) """ if var not in VARIABLES.keys(): log.error('Wrong variable name (see variables.py)') raise ValueError('Wrong variable name (see variables.py)') var_info = VARIABLES[var] hnum = Hist(var_info['bins'], var_info['range'][0], var_info['range'][1]) hden = Hist(var_info['bins'], var_info['range'][0], var_info['range'][1]) log.info('filling') if prefix is not None: field = prefix + '_' + var_info['name'] else: field = var_info['name'] num = accept[field] den = total[field] if 'scale' in var_info.keys(): num *= var_info['scale'] den *= var_info['scale'] fill_hist(hnum, accept[prefix + '_' + var], accept[weight_field]) fill_hist(hden, total[prefix + '_' + var], total[weight_field]) eff = Efficiency(hnum, hden, name='eff_{0}'.format(var), title=get_label(VARIABLES[var])) return eff
def test_efficiency(): # 1D eff = Efficiency(Hist(10, 0, 1), Hist(10, 0, 1)) eff.Fill(False, 0.1) eff.Fill(True, 0.8) assert_equal(len(eff), len(eff.total)) if ROOT_VERSION >= ROOTVersion(53417): assert eff.graph assert eff.painted_graph assert_equal(len(list(eff.efficiencies())), 10) assert_equal(len(list(eff.efficiencies(overflow=True))), 12) assert_equal(len(list(eff.errors())), 10) assert_equal(len(list(eff.errors(overflow=True))), 12) # 2D eff = Efficiency(Hist2D(10, 0, 1, 10, 0, 1), Hist2D(10, 0, 1, 10, 0, 1)) eff.Fill(False, 0.1) eff.Fill(True, 0.8) assert_equal(len(eff), len(eff.total)) if ROOT_VERSION >= ROOTVersion(53417): assert eff.histogram assert eff.painted_histogram
def efficiency_graph(tobject_numerator, tobject_denominator): """Compute TEfficiency with proper clopper-pearson intervals""" _eff = Efficiency(tobject_numerator, tobject_denominator) return asrootpy(_eff.CreateGraph())
def efficiency(tobject_numerator, tobject_denominator): """Compute TEfficiency""" return Efficiency(tobject_numerator, tobject_denominator)
def test_overall_efficiency(): for stat_op in range(0, 8): Eff = Efficiency(Hist(20, -3, 3), Hist(20, -3, 3)) Eff_1bin = Efficiency(Hist(1, -3, 3), Hist(1, -3, 3)) Eff.SetStatisticOption(stat_op) Eff_1bin.SetStatisticOption(stat_op) for i in xrange(1000): x = gauss(0, 3.6) w = uniform(0, 1) passed = w > 0.5 Eff.Fill(passed, x) Eff_1bin.Fill(passed, x) assert_almost_equal( Eff.overall_efficiency(overflow=True)[0], Eff_1bin.overall_efficiency(overflow=True)[0]) assert_almost_equal( Eff.overall_efficiency(overflow=True)[1], Eff_1bin.overall_efficiency(overflow=True)[1]) assert_almost_equal( Eff.overall_efficiency(overflow=True)[2], Eff_1bin.overall_efficiency(overflow=True)[2])
eff_ntuple.Draw("nPVs", "recoPt > 40 && l1gMatch && max(l1gPt, l1gRegionEt) > 25", hist=uct_pass_vs_pu) eff_ntuple.Draw("nPVs", "recoPt > 40 && l1gMatch && max(l1gPt, l1gRegionEt) > 25" " && l1gJetPt/max(l1gPt, l1gRegionEt) - 1 < 0.5", hist=uct_iso_pass_vs_pu) eff_ntuple.Draw("nPVs", "recoPt > 40 && l1Match && l1Pt > 44", hist=l1_pass_vs_pu) canvas = Canvas(800, 800) l1_efficiency = Efficiency(l1_pass_vs_pu, total_vs_pu).decorate(linecolor='red', linewidth=2, markerstyle=20, markercolor='red') uct_efficiency = Efficiency(uct_pass_vs_pu, total_vs_pu).decorate(linecolor='blue', linewidth=2, markerstyle=20, markercolor='blue') uct_iso_efficiency = Efficiency(uct_iso_pass_vs_pu, total_vs_pu).decorate(linecolor='green', linewidth=2, markerstyle=20, markercolor='green')
def make_efficiency_plots_pls(names, outname, colors): if len(names) < 2: raise ValueError("I need 2 or more histograms!") if len(names) != len(colors): raise ValueError("I need a color for each histogram!") # First n = len(names) i = 1 hname = names[0] denom = histograms[hname] hname = names[i] numer = histograms[hname] hname = outname eff = Efficiency(numer, denom, name=hname) eff.SetStatisticOption(0) # kFCP eff.SetConfidenceLevel(0.682689492137) # one sigma eff.linecolor = colors[i] eff.linewidth = 2 eff.markercolor = colors[i] eff.markerstyle = 1 # frame = eff.GetCopyTotalHisto().Clone(hname + "_frame") frame.Reset() frame.SetMinimum(0) frame.SetMaximum(1.2) frame.GetYaxis().SetTitle("#varepsilon") frame.SetStats(0) frame.Draw() tline = TLine() tline.SetLineColor(1) xmin, xmax = frame.GetXaxis().GetXmin(), frame.GetXaxis().GetXmax() tline.DrawLine(xmin, 1.0, xmax, 1.0) # eff.Draw("same p") keepalive(gPad.func(), eff) gPad.Print(options.outdir + hname + ".png") # Second and on for i in xrange(2, n): hname = names[0] denom = histograms[hname] hname = names[i] numer = histograms[hname] hname = outname eff = Efficiency(numer, denom, name=hname) eff.SetStatisticOption(0) # kFCP eff.SetConfidenceLevel(0.682689492137) # one sigma eff.linecolor = colors[i] eff.linewidth = 2 eff.markercolor = colors[i] eff.markerstyle = 1 eff.Draw("same p") keepalive(gPad.func(), eff) gPad.Print(options.outdir + hname + ".png") return
for in_q, q in zip(in_quantiles, quantiles): print "..", in_q, q h1a.linecolor = options.palette[i] h1a.linewidth = 2 h1a.Draw("hist") gPad.Print(options.outdir + hname + ".png") # Make efficiency vs gen pT k = "denom" hname = "eff_vs_genpt_l1pt20_%s" % k denom = histograms[hname] k = "numer" hname = "eff_vs_genpt_l1pt20_%s" % k numer = histograms[hname] hname = "eff_vs_genpt_l1pt20" eff = Efficiency(numer, denom, name=hname) eff.SetStatisticOption(0) # kFCP eff.SetConfidenceLevel(0.682689492137) # one sigma eff.linecolor = 'gray' eff.linewidth = 2 eff.markercolor = 'gray' eff.markerstyle = 1 # frame = eff.GetCopyTotalHisto().Clone(hname + "_frame") frame.Reset() frame.SetMinimum(0) frame.SetMaximum(1.2) frame.GetYaxis().SetTitle("#varepsilon") frame.SetStats(0) frame.Draw() tline = TLine()