Esempio n. 1
0
def GetTGraphAsymmErrors(histo, offset):

    graph_in = TGraphAsymmErrors(histo)
    graph_in.SetName(histo.GetName() + '_tmp')
    graph_out = TGraphAsymmErrors(histo)
    graph_out.SetName(histo.GetName() + '_offset')

    for i in range(graph_in.GetN()):
        graph_out.GetX()[i] = graph_in.GetX()[i] + offset
        graph_out.GetEXlow()[i] = graph_in.GetEXlow()[i] + offset
        graph_out.GetEXhigh()[i] = graph_in.GetEXhigh()[i] - offset

    x_min = graph_out.GetX()[0] - graph_out.GetEXlow()[0]
    if (x_min > 0): x_min = 0
    x_max = graph_out.GetX()[graph_out.GetN() -
                             1] + graph_out.GetEXhigh()[graph_out.GetN() - 1]
    graph_out.GetXaxis().SetLimits(x_min, x_max)

    return graph_out
Esempio n. 2
0
def uncertainty_band(process, region, calculation, uncertainty, estimation):
    """Calculates an uncertainty band (TGraphAsymmErrors) for a specific
    uncertainty.

    Any uncertainties (except statistical) that have a shape component will
    have the shape component converted to an overall component (considered in
    addition to any existing overall component).

    NOTE: CURRENTLY ROOT'S STATISTICAL ERRORS ARE USED AND THIS METHOD IS
    DISABLED
    NOTE: The statistical error calculated is the statistical error on the
    estimate given the poisson statistics of the unweighted samples. This is
    calculated per-bin as:

        uncertainty = sqrt(unweighted) * weighted / unweighted
                    = weighted / sqrt(unweighted)

    For samples such as data, where there is no weighting applied, this just
    reduces to sqrt(weighted) = sqrt(unweighted), which is the usual
    uncertainty applied by ROOT.

    NOTE: The y-component of the uncertainty band will NOT be set to bin
    content because this leads to errors in usage when combining the bands
    and is only really necessary when computing the final combined band, so
    instead simply pass a base for the error band when calling
    combined_uncertainty_band.

    Args:
        process: The process to consider
        region: The region to consider
        calculation: The calculation, which should return a histogram
        uncertainty: The Uncertainty subclass to consider, or None to compute
            statistical uncertainty of Monte Carlo samples
        estimation: The Estimation subclass to consider

    Returns:
        A TGraphAsymmErrors representing the error band.
    """
    # Compute the nominal histogram
    nominal = estimation(calculation)(process, region)

    # Compute and unpack variations
    if uncertainty is not None:
        # Perform the uncertainty estimation
        variations = estimation(uncertainty(calculation))(process, region)

        # Unpack variations
        overall_up, overall_down, shape_up, shape_down = variations

        # Convert any shape variations to overall
        if shape_up is None:
            shape_overall_up = None
        else:
            shape_overall_up = to_overall(shape_up, nominal)
            shape_up = None
        if shape_down is None:
            shape_overall_down = None
        else:
            shape_overall_down = to_overall(shape_down, nominal)
            shape_down = None
    else:
        # We're computing statistical variation, so we don't need these
        overall_up = overall_down = None
        shape_up = shape_down = None
        shape_overall_up = shape_overall_down = None

        # TODO: What's the idea here? This doesn't work as it is.
        # For computing the uncertainty of weighted MC samples, we need the
        # unweighted histogram
        #unweighted = estimation(calculation)(
        #process,
        #region.weighted(False),
        #weighted_combination = False
        #)

    # Create the error band.  We pass it the nominal histogram just to get
    # the binning correct.  The graph will also extract values and errors
    # from the histogram, but that's okay because we'll overwrite them
    # below.
    band = TGraphAsymmErrors(nominal)

    # Get the number of bins in the histogram
    bins = nominal.GetNbinsX()

    # Go through each point in the graph and 0-out the Y-value and Y-error.
    # Unfortunately we can't set the Y-value individually (which would have
    # been great since the X-values would already be at bin centers).
    # Anyway, no big deal, X-values are easy to set.  The X-error will have
    # already bin set to bin width.
    for bin in xrange(0, bins):
        band.SetPoint(bin, band.GetX()[bin], 0)
        band.SetPointEYhigh(bin, 0.0)
        band.SetPointEYlow(bin, 0.0)

    # Loop over all bins and compute uncertainties
    for bin, point in zip(range(1, bins + 1), range(0, bins)):
        # Get the bin content
        content = nominal.GetBinContent(bin)

        # If the content is 0, there are no uncertainties, because we only
        # consider overall and statistical uncertainties
        if content == 0.0:
            band.SetPointEYhigh(point, 0.0)
            band.SetPointEYlow(point, 0.0)
            continue

        # Create a list of fractional variations for this bin.  These lists
        # will hold FRACTIONAL variations, i.e. variations normalized to bin
        # content, and will be converted to absolute variations below when they
        # are set as errors.
        up_variations = []
        down_variations = []

        # Add any overall variations
        if overall_up is not None:
            up_variations.append(overall_up - 1.0)
        if overall_down is not None:
            down_variations.append(1.0 - overall_down)
        if shape_overall_up is not None:
            up_variations.append(shape_overall_up - 1.0)
        if shape_overall_down is not None:
            down_variations.append(1.0 - shape_overall_down)

        # TODO: What's the point of using the unweighted distribution? It
        # makes no sense to me.
        # Add the statistical variation if uncertainty is None.  Note that we
        # compute this for the statistics of the unweighted Monte Carlo and not
        # the weighted bin count.
        #if uncertainty is None:
        ## Get the unweighted content
        #unweighted_content = unweighted.GetBinContent(bin)

        ## Calculate error if possible
        #if content > 0.0 and unweighted_content > 0.0:
        ## The extra factor of 1/content is just because we normalize
        ## everything to content for combining together.  It has nothing
        ## to do with the derivation of the uncertainty, and it is
        ## multipled out below.
        #statistical_variation = (
        #content / sqrt(unweighted_content)
        #) / content
        #up_variations.append(statistical_variation)
        #down_variations.append(statistical_variation)

        # Statistical error; use the error from the distribution directly
        if uncertainty is None:
            error = nominal.GetBinError(bin)
            band.SetPointEYhigh(point, error)
            band.SetPointEYlow(point, error)
        else:
            # Set the point and error.  Note that, since we sum things in
            # quadrature, it really doesn't matter how we compute the
            # differences above.
            band.SetPointEYhigh(point, sum_quadrature(up_variations) * content)
            band.SetPointEYlow(point,
                               sum_quadrature(down_variations) * content)

    # All done
    return band
def plotDataOverMCEff(hist_mc_tight,
                      hist_mc_loose,
                      hist_data_tight,
                      hist_data_loose,
                      plot_name='fakerate.pdf'):

    g = TGraphAsymmErrors(hist_mc_tight)
    g.Divide(hist_mc_tight, hist_mc_loose)
    g.GetYaxis().SetTitle('Fake rate')
    g.GetXaxis().SetTitle(hist_mc_tight.GetXaxis().GetTitle())
    g.GetYaxis().SetTitleOffset(1.2)
    g.GetYaxis().SetTitleOffset(1.3)

    g.SetLineColor(2)
    g.SetMarkerColor(2)

    g_data = TGraphAsymmErrors(hist_data_tight)
    g_data.Divide(hist_data_tight, hist_data_loose)
    g_data.GetYaxis().SetTitle('Fake rate')
    g_data.GetXaxis().SetTitle(hist_data_tight.GetXaxis().GetTitle())
    g_data.GetYaxis().SetTitleOffset(1.2)
    g_data.GetYaxis().SetTitleOffset(1.3)
    g_data.SetMarkerColor(1)

    g_vals = g.GetY()
    g_data_vals = g_data.GetY()

    g_ratio = g_data.Clone('ratio')

    for i in xrange(g_data.GetN()):
        ratio = g_data_vals[i] / g_vals[i] if g_vals[i] else 0.
        g_ratio.SetPoint(i, g.GetX()[i], ratio)

        rel_y_low = math.sqrt((g_data.GetErrorYlow(i) / g_data_vals[i])**2 + (
            g.GetErrorYlow(i) /
            g_vals[i])**2) if g_data_vals[i] > 0. and g_vals[i] > 0. else 0.

        g_ratio.SetPointEYlow(i, rel_y_low * ratio)

        rel_y_high = math.sqrt(
            (g_data.GetErrorYhigh(i) / g_data_vals[i])**2 +
            (g.GetErrorYhigh(i) /
             g_vals[i])**2) if g_data_vals[i] > 0. and g_vals[i] > 0. else 0.

        g_ratio.SetPointEYhigh(i, rel_y_high * ratio)

    # Gymnastics to get same label sizes etc in ratio and main plot
    ytp_ratio = 2.
    xtp_ratio = 2.

    # hr.GetYaxis().SetNdivisions(4)

    g_ratio.GetYaxis().SetTitleSize(g.GetYaxis().GetTitleSize() * xtp_ratio)
    g_ratio.GetXaxis().SetTitleSize(g.GetXaxis().GetTitleSize() * ytp_ratio)

    g_ratio.GetYaxis().SetTitleOffset(g.GetYaxis().GetTitleOffset() /
                                      xtp_ratio)
    g_ratio.GetXaxis().SetTitleOffset(
        g.GetXaxis().GetTitleOffset())  # / ytp_ratio)

    g_ratio.GetYaxis().SetLabelSize(g.GetYaxis().GetLabelSize() * xtp_ratio)
    g_ratio.GetXaxis().SetLabelSize(g.GetXaxis().GetLabelSize() * ytp_ratio)

    g_data.GetXaxis().SetLabelColor(0)
    g_data.GetXaxis().SetLabelSize(0)
    g.GetXaxis().SetLabelColor(0)
    g.GetXaxis().SetLabelSize(0)

    g_ratio.GetXaxis().SetTitle(g.GetXaxis().GetTitle())

    # maxy = 1.1 * min(g.GetMaximum(), g_data.GetMaximum(), 0.2)
    g.GetYaxis().SetRangeUser(0.001, 0.2)

    cv, pad, padr = HistDrawer.buildCanvas()

    pad.cd()

    g.Draw('AP')
    g_data.Draw('P')

    legend = TLegend(0.23, 0.73, 0.43, 0.91)
    legend.SetFillColor(0)
    legend.SetFillStyle(0)
    legend.SetLineColor(0)
    legend.SetLineWidth(0)

    legend.AddEntry(g.GetName(), 'MC', 'lep')
    legend.AddEntry(g_data.GetName(), 'Observed', 'lep')

    legend.Draw()

    padr.cd()
    g_ratio.GetYaxis().SetRangeUser(0.51, 1.49)
    g_ratio.GetYaxis().SetTitle('Obs/MC')
    g_ratio.Draw('AP')

    drawRatioLines(g_ratio)

    cv.Print(plot_name)
Esempio n. 4
0
def plotDataOverMCEff(hist_mc_tight, hist_mc_loose, hist_data_tight, hist_data_loose, plot_name='fakerate.pdf', mc_leg='MC', obs_leg='Observed', ratio_leg='Obs/MC'):

    g = TGraphAsymmErrors(hist_mc_tight)
    g.Divide(hist_mc_tight, hist_mc_loose)
    g.GetYaxis().SetTitle('Misidentification rate')
    g.GetXaxis().SetTitle(hist_mc_tight.GetXaxis().GetTitle())
    g.GetYaxis().SetTitleOffset(1.2)
    g.GetYaxis().SetTitleOffset(1.3)

    g.SetLineColor(2)
    g.SetMarkerColor(2)

    g_data = TGraphAsymmErrors(hist_data_tight)
    g_data.Divide(hist_data_tight, hist_data_loose)

    # if g_data.GetN() != hist_data_tight.GetNbinsX():
    #     import pdb; pdb.set_trace()

    g_data.GetYaxis().SetTitle('Misidentification rate')
    g_data.GetXaxis().SetTitle(hist_data_tight.GetXaxis().GetTitle())
    g_data.GetYaxis().SetTitleOffset(1.2)
    g_data.GetYaxis().SetTitleOffset(1.3)
    g_data.SetMarkerColor(1)

    g_vals = g.GetY()
    g_data_vals = g_data.GetY()

    g_ratio = g_data.Clone('ratio')

    for i in xrange(g_data.GetN()):
        ratio = g_data_vals[i]/g_vals[i] if g_vals[i] else 0.
        g_ratio.SetPoint(i, g.GetX()[i], ratio)

        rel_y_low = math.sqrt((g_data.GetErrorYlow(i)/g_data_vals[i])**2 + (g.GetErrorYlow(i)/g_vals[i])**2) if g_data_vals[i] > 0. and g_vals[i] > 0. else 0.

        g_ratio.SetPointEYlow(i, rel_y_low * ratio)

        rel_y_high = math.sqrt((g_data.GetErrorYhigh(i)/g_data_vals[i])**2 + (g.GetErrorYhigh(i)/g_vals[i])**2) if g_data_vals[i] > 0. and g_vals[i] > 0. else 0.

        g_ratio.SetPointEYhigh(i, rel_y_high * ratio)

    # Gymnastics to get same label sizes etc in ratio and main plot
    ytp_ratio = 2.
    xtp_ratio = 2.

    # hr.GetYaxis().SetNdivisions(4)

    g_ratio.GetYaxis().SetTitleSize(g.GetYaxis().GetTitleSize() * xtp_ratio)
    g_ratio.GetXaxis().SetTitleSize(g.GetXaxis().GetTitleSize() * ytp_ratio)

    g_ratio.GetYaxis().SetTitleOffset(g.GetYaxis().GetTitleOffset() / xtp_ratio)
    g_ratio.GetXaxis().SetTitleOffset(g.GetXaxis().GetTitleOffset())  # / ytp_ratio)

    g_ratio.GetYaxis().SetLabelSize(g.GetYaxis().GetLabelSize() * xtp_ratio)
    g_ratio.GetXaxis().SetLabelSize(g.GetXaxis().GetLabelSize() * ytp_ratio)

    g_data.GetXaxis().SetLabelColor(0)
    g_data.GetXaxis().SetLabelSize(0)
    g.GetXaxis().SetLabelColor(0)
    g.GetXaxis().SetLabelSize(0)

    g_ratio.GetXaxis().SetTitle(g.GetXaxis().GetTitle())

    maxy = 1.3 * max(g.GetMaximum(), g_data.GetMaximum(), 0.05)
    g.GetYaxis().SetRangeUser(0.0011, maxy)

    cv, pad, padr = HistDrawer.buildCanvas()

    pad.cd()

    g.Draw('AP')
    g_data.Draw('P')

    legend = TLegend(0.23, 0.73, 0.43, 0.91)
    legend.SetFillColor(0)
    legend.SetFillStyle(0)
    legend.SetLineColor(0)
    legend.SetLineWidth(0)

    legend.AddEntry(g.GetName(), mc_leg, 'lep')
    legend.AddEntry(g_data.GetName(), obs_leg, 'lep')

    legend.Draw()

    padr.cd()
    g_ratio.GetYaxis().SetRangeUser(0.01, 1.99)
    g_ratio.GetYaxis().SetTitle(ratio_leg)
    g_ratio.Draw('AP')

    drawRatioLines(g_ratio)

    cv.Print(plot_name)

    g.GetYaxis().SetRangeUser(0.0001, 1)
    pad.SetLogy(True)
    cv.Print(plot_name.replace('.', '_log.'))
    f = ROOT.TFile(plot_name.replace('.', '_log.').replace('.pdf', '.root'), 'RECREATE')
    g.Write()
    g_data.Write()
    cv.Write()
    f.Close()