Ejemplo n.º 1
0
def produce_pickle_file(hist_passed_data, hist_total_data, file_name):
    output_pickle = open(file_name, 'wb')
    dictionary = {}

    number_of_pt_bin_edges = len(jet_pt_bins)

    data_efficiency = Efficiency(hist_passed_data, hist_total_data)

    for i in range(number_of_pt_bin_edges - 1):
        lower_edge_pt = jet_pt_bins[i]
        upper_edge_pt = jet_pt_bins[i + 1]
        pt_bin_range = 'pt_' + str(lower_edge_pt) + '_' + str(upper_edge_pt)
        dictionary[pt_bin_range] = {}

        data_efficiency_in_bin = data_efficiency.GetEfficiency(i + 1)
        data_efficiency_in_bin_error_up = data_efficiency.GetEfficiencyErrorUp(
            i + 1)
        data_efficiency_in_bin_error_down = data_efficiency.GetEfficiencyErrorLow(
            i + 1)
        dictionary[pt_bin_range]['data'] = {
            'efficiency': data_efficiency_in_bin,
            'err_up': data_efficiency_in_bin_error_up,
            'err_down': data_efficiency_in_bin_error_down,
        }

    pickle.dump(dictionary, output_pickle)
Ejemplo n.º 2
0
def efficiencies_plot(ana, category, discri_var, plot_level, working_points):

    vars = {
        'pt': VARIABLES['pt'],
        'eta': VARIABLES['eta'],
        'good_npv': VARIABLES['good_npv'],
        'averageintpercrossing': VARIABLES['averageintpercrossing'],
    }
    canvases = {}

    efficiencies = {}
    for v in vars.keys():
        efficiencies[v] = []

    for wp in working_points:
        cut = wp.cut if isinstance(wp.cut, str) else '{0} >= {1}'.format(
            discri_var, wp.cut)
        hist_samples = ana.get_hist_samples_array(vars,
                                                  plot_level,
                                                  category=category)
        hist_samples_cut = ana.get_hist_samples_array(vars,
                                                      plot_level,
                                                      category=category,
                                                      cuts=cut)
        for v in vars.keys():
            efficiencies[v].append(
                Efficiency(hist_samples_cut[v]['tau'],
                           hist_samples[v]['tau'],
                           title=wp.name))

    for v, effs in efficiencies.items():
        canvases[v] = draw_efficiencies(effs, plot_level + '_' + v, category)

    return canvases
Ejemplo n.º 3
0
    def get_efficiency(parentDirectory,
                       denomHistName,
                       numerHistName,
                       binsToMerge=None,
                       numerAdd=None):
        sampleNames = [k.name for k in parentDirectory.keys()]
        if len(sampleNames) == 0:
            raise ValueError('No sample directory found!')

        hDenom = getattr(getattr(parentDirectory, sampleNames[0]),
                         denomHistName).clone()
        hNumer = getattr(getattr(parentDirectory, sampleNames[0]),
                         numerHistName).clone()
        if numerAdd:
            hNumer.Add(
                getattr(getattr(parentDirectory, sampleNames[0]),
                        numerHistName + numerAdd).clone())

        for s in sampleNames[1:]:
            hDenom.Add(
                getattr(getattr(parentDirectory, s), denomHistName).clone())
            hNumer.Add(
                getattr(getattr(parentDirectory, s), numerHistName).clone())
            if numerAdd:
                hNumer.Add(
                    getattr(getattr(parentDirectory, s),
                            numerHistName + numerAdd).clone())

        if binsToMerge:
            hDenom = hDenom.merge_bins(binsToMerge)
            hNumer = hNumer.merge_bins(binsToMerge)
        return Efficiency(hNumer, hDenom)
Ejemplo n.º 4
0
def rejection_linear(eff):
    htot = asrootpy(eff.GetTotalHistogram()).Clone()
    hpass = asrootpy(eff.GetPassedHistogram()).Clone()
    hnotpass =  htot - hpass
    hnotpass.Sumw2()
    rej = Efficiency(hnotpass, htot, name='Rejlin_{0}'.format(eff.name), title=eff.title)
    return rej
Ejemplo n.º 5
0
def get_efficiency_array():
    eff_array = {}
    for var in VARIABLES['plotting']:
        hpassed = Hist(var['bins'], var['range'][0], var['range'][1])
        htotal = Hist(var['bins'], var['range'][0], var['range'][1])
        eff_array[var['name']] = Efficiency(hpassed,
                                            htotal,
                                            title=get_label(var))
    return eff_array
Ejemplo n.º 6
0
def get_efficiency_graphs(d):
    efficiency_graphs = []
    #for s in sample_names2:
    # sample_dir = getattr(d2, s)
    #  numer_ = getattr(sample_dir, 'lxyDpToEl__match').clone()
    #  denom_ = getattr(sample_dir, 'lxyDpToEl__total').clone()
    # g = Efficiency(numer_, denom_).graph
    # efficiency_graphs.append(g)
    for s in sample_names:
        sample_dir = getattr(d, s)
        numer_ = getattr(sample_dir, 'lxyDpToEl__match').clone()
        denom_ = getattr(sample_dir, 'lxyDpToEl__total').clone()
        g = Efficiency(numer_, denom_).graph
        efficiency_graphs.append(g)
    return efficiency_graphs
Ejemplo n.º 7
0
def test_overall_efficiency():
    for stat_op in range(0, 8):
        Eff = Efficiency(Hist(20, -3, 3), Hist(20, -3, 3))
        Eff_1bin = Efficiency(Hist(1, -3, 3), Hist(1, -3, 3))
        Eff.SetStatisticOption(stat_op)
        Eff_1bin.SetStatisticOption(stat_op)

        for i in range(1000):
            x = gauss(0, 3.6)
            w = uniform(0, 1)
            passed = w > 0.5
            Eff.Fill(passed, x)
            Eff_1bin.Fill(passed, x)

        assert_almost_equal(Eff.overall_efficiency(overflow=True)[0],
                            Eff_1bin.overall_efficiency(overflow=True)[0])
        assert_almost_equal(Eff.overall_efficiency(overflow=True)[1],
                            Eff_1bin.overall_efficiency(overflow=True)[1])
        assert_almost_equal(Eff.overall_efficiency(overflow=True)[2],
                            Eff_1bin.overall_efficiency(overflow=True)[2])
Ejemplo n.º 8
0
def eff_curve(accept, total, var, weight_field=None, prefix='off'):
    """
    Draw the efficiency curve from two record arrays
    (selection already applied so computation should be fast over several vars)
    """

    if var not in VARIABLES.keys():
        log.error('Wrong variable name (see variables.py)')
        raise ValueError('Wrong variable name (see variables.py)')

    var_info = VARIABLES[var]
    hnum = Hist(var_info['bins'], var_info['range'][0], var_info['range'][1])

    hden = Hist(var_info['bins'], var_info['range'][0], var_info['range'][1])

    log.info('filling')
    if prefix is not None:
        field = prefix + '_' + var_info['name']
    else:
        field = var_info['name']

    num = accept[field]
    den = total[field]

    if 'scale' in var_info.keys():
        num *= var_info['scale']
        den *= var_info['scale']

    fill_hist(hnum, accept[prefix + '_' + var], accept[weight_field])
    fill_hist(hden, total[prefix + '_' + var], total[weight_field])
    eff = Efficiency(hnum,
                     hden,
                     name='eff_{0}'.format(var),
                     title=get_label(VARIABLES[var]))

    return eff
Ejemplo n.º 9
0
def test_efficiency():
    # 1D
    eff = Efficiency(Hist(10, 0, 1), Hist(10, 0, 1))
    eff.Fill(False, 0.1)
    eff.Fill(True, 0.8)
    assert_equal(len(eff), len(eff.total))
    if ROOT_VERSION >= ROOTVersion(53417):
        assert eff.graph
    assert eff.painted_graph
    assert_equal(len(list(eff.efficiencies())), 10)
    assert_equal(len(list(eff.efficiencies(overflow=True))), 12)
    assert_equal(len(list(eff.errors())), 10)
    assert_equal(len(list(eff.errors(overflow=True))), 12)
    # 2D
    eff = Efficiency(Hist2D(10, 0, 1, 10, 0, 1), Hist2D(10, 0, 1, 10, 0, 1))
    eff.Fill(False, 0.1)
    eff.Fill(True, 0.8)
    assert_equal(len(eff), len(eff.total))
    if ROOT_VERSION >= ROOTVersion(53417):
        assert eff.histogram
    assert eff.painted_histogram
Ejemplo n.º 10
0
    def efficiency_graph(tobject_numerator, tobject_denominator):
        """Compute TEfficiency with proper clopper-pearson intervals"""

        _eff = Efficiency(tobject_numerator, tobject_denominator)
        return asrootpy(_eff.CreateGraph())
Ejemplo n.º 11
0
    def efficiency(tobject_numerator, tobject_denominator):
        """Compute TEfficiency"""

        return Efficiency(tobject_numerator, tobject_denominator)
Ejemplo n.º 12
0
def test_efficiency():
    # 1D
    eff = Efficiency(Hist(10, 0, 1), Hist(10, 0, 1))
    eff.Fill(False, 0.1)
    eff.Fill(True, 0.8)
    assert_equal(len(eff), len(eff.total))
    if ROOT_VERSION >= ROOTVersion(53417):
        assert eff.graph
    assert eff.painted_graph
    assert_equal(len(list(eff.efficiencies())), 10)
    assert_equal(len(list(eff.efficiencies(overflow=True))), 12)
    assert_equal(len(list(eff.errors())), 10)
    assert_equal(len(list(eff.errors(overflow=True))), 12)
    # 2D
    eff = Efficiency(Hist2D(10, 0, 1, 10, 0, 1), Hist2D(10, 0, 1, 10, 0, 1))
    eff.Fill(False, 0.1)
    eff.Fill(True, 0.8)
    assert_equal(len(eff), len(eff.total))
    if ROOT_VERSION >= ROOTVersion(53417):
        assert eff.histogram
    assert eff.painted_histogram
Ejemplo n.º 13
0
def test_overall_efficiency():
    for stat_op in range(0, 8):
        Eff = Efficiency(Hist(20, -3, 3), Hist(20, -3, 3))
        Eff_1bin = Efficiency(Hist(1, -3, 3), Hist(1, -3, 3))
        Eff.SetStatisticOption(stat_op)
        Eff_1bin.SetStatisticOption(stat_op)

        for i in xrange(1000):
            x = gauss(0, 3.6)
            w = uniform(0, 1)
            passed = w > 0.5
            Eff.Fill(passed, x)
            Eff_1bin.Fill(passed, x)

        assert_almost_equal(
            Eff.overall_efficiency(overflow=True)[0],
            Eff_1bin.overall_efficiency(overflow=True)[0])
        assert_almost_equal(
            Eff.overall_efficiency(overflow=True)[1],
            Eff_1bin.overall_efficiency(overflow=True)[1])
        assert_almost_equal(
            Eff.overall_efficiency(overflow=True)[2],
            Eff_1bin.overall_efficiency(overflow=True)[2])
Ejemplo n.º 14
0
eff_ntuple.Draw("nPVs",
                "recoPt > 40 && l1gMatch && max(l1gPt, l1gRegionEt) > 25",
                hist=uct_pass_vs_pu)
eff_ntuple.Draw("nPVs",
                "recoPt > 40 && l1gMatch && max(l1gPt, l1gRegionEt) > 25"
                " && l1gJetPt/max(l1gPt, l1gRegionEt) - 1 < 0.5",
                hist=uct_iso_pass_vs_pu)
eff_ntuple.Draw("nPVs",
                "recoPt > 40 && l1Match && l1Pt > 44",
                hist=l1_pass_vs_pu)

canvas = Canvas(800, 800)

l1_efficiency = Efficiency(l1_pass_vs_pu,
                           total_vs_pu).decorate(linecolor='red',
                                                 linewidth=2,
                                                 markerstyle=20,
                                                 markercolor='red')

uct_efficiency = Efficiency(uct_pass_vs_pu,
                            total_vs_pu).decorate(linecolor='blue',
                                                  linewidth=2,
                                                  markerstyle=20,
                                                  markercolor='blue')

uct_iso_efficiency = Efficiency(uct_iso_pass_vs_pu,
                                total_vs_pu).decorate(linecolor='green',
                                                      linewidth=2,
                                                      markerstyle=20,
                                                      markercolor='green')
Ejemplo n.º 15
0
def make_efficiency_plots_pls(names, outname, colors):
    if len(names) < 2:
        raise ValueError("I need 2 or more histograms!")

    if len(names) != len(colors):
        raise ValueError("I need a color for each histogram!")

    # First
    n = len(names)
    i = 1
    hname = names[0]
    denom = histograms[hname]
    hname = names[i]
    numer = histograms[hname]
    hname = outname
    eff = Efficiency(numer, denom, name=hname)
    eff.SetStatisticOption(0)  # kFCP
    eff.SetConfidenceLevel(0.682689492137)  # one sigma
    eff.linecolor = colors[i]
    eff.linewidth = 2
    eff.markercolor = colors[i]
    eff.markerstyle = 1
    #
    frame = eff.GetCopyTotalHisto().Clone(hname + "_frame")
    frame.Reset()
    frame.SetMinimum(0)
    frame.SetMaximum(1.2)
    frame.GetYaxis().SetTitle("#varepsilon")
    frame.SetStats(0)
    frame.Draw()
    tline = TLine()
    tline.SetLineColor(1)
    xmin, xmax = frame.GetXaxis().GetXmin(), frame.GetXaxis().GetXmax()
    tline.DrawLine(xmin, 1.0, xmax, 1.0)
    #
    eff.Draw("same p")
    keepalive(gPad.func(), eff)
    gPad.Print(options.outdir + hname + ".png")

    # Second and on
    for i in xrange(2, n):
        hname = names[0]
        denom = histograms[hname]
        hname = names[i]
        numer = histograms[hname]
        hname = outname
        eff = Efficiency(numer, denom, name=hname)
        eff.SetStatisticOption(0)  # kFCP
        eff.SetConfidenceLevel(0.682689492137)  # one sigma
        eff.linecolor = colors[i]
        eff.linewidth = 2
        eff.markercolor = colors[i]
        eff.markerstyle = 1
        eff.Draw("same p")
        keepalive(gPad.func(), eff)
        gPad.Print(options.outdir + hname + ".png")
    return
    for in_q, q in zip(in_quantiles, quantiles):
        print "..", in_q, q
    h1a.linecolor = options.palette[i]
    h1a.linewidth = 2
    h1a.Draw("hist")
    gPad.Print(options.outdir + hname + ".png")

    # Make efficiency vs gen pT
    k = "denom"
    hname = "eff_vs_genpt_l1pt20_%s" % k
    denom = histograms[hname]
    k = "numer"
    hname = "eff_vs_genpt_l1pt20_%s" % k
    numer = histograms[hname]
    hname = "eff_vs_genpt_l1pt20"
    eff = Efficiency(numer, denom, name=hname)
    eff.SetStatisticOption(0)  # kFCP
    eff.SetConfidenceLevel(0.682689492137)  # one sigma
    eff.linecolor = 'gray'
    eff.linewidth = 2
    eff.markercolor = 'gray'
    eff.markerstyle = 1
    #
    frame = eff.GetCopyTotalHisto().Clone(hname + "_frame")
    frame.Reset()
    frame.SetMinimum(0)
    frame.SetMaximum(1.2)
    frame.GetYaxis().SetTitle("#varepsilon")
    frame.SetStats(0)
    frame.Draw()
    tline = TLine()