Beispiel #1
0
def ratio_root(num, num_err_sq, denom, denom_err_sq):
    import rootpy.plotting as rp
    # rootpy seems to switch this on, flooding the terminal with debugging output
    import logging
    logging.getLogger("matplotlib.font_manager").setLevel(logging.INFO)

    # Set up the histograms
    top = rp.Hist(len(num), 0, 1)
    bottom = rp.Hist(len(num), 0, 1)
    for i, (d, n, d_err,
            n_err) in enumerate(zip(denom, num, denom_err_sq, num_err_sq)):
        bottom[i + 1] = (d, d_err)
        top[i + 1] = (n, n_err)

    # Do the actual division
    div = rp.Graph.divide(top, bottom, "e0 midp pois")

    # Convert this back to the array of points for the ratio plots
    ratios = np.zeros_like(num)
    low = np.zeros_like(num)
    upper = np.zeros_like(num)
    filled_indices = [top.FindBin(point.x.value) - 1 for point in div]
    ratios[filled_indices] = [point.y.value for point in div]
    low[filled_indices] = [point.y.error_low for point in div]
    upper[filled_indices] = [point.y.error_hi for point in div]
    return ratios, low, upper
def set_dyn_binning(va, lo, up, n, err=0.15):
    #eps=0.0001
    #va = va[(np.abs(va)>up_l) & (va!=999.9)]
    #va = np.clip(va,lo+eps,up-eps)
    binning = np.linspace(lo, up, n)
    h1 = rplot.Hist(binning)
    map(h1.Fill, va)
    h1.Scale(1 / (h1.Integral(0, h1.GetNbinsX() + 1)))
    #print(binning)

    for i in range(n - 1, 1, -1):
        #print(i," ",binning[i])
        #if (h1.GetBinContent(i)==0):
        #print(0)
        #	continue
        #print(h1.GetBinError(i)/h1.GetBinContent(i))

        if (h1.GetBinContent(i) == 0):
            binning = np.delete(binning, i - 1)
            h1 = rplot.Hist(binning)
            map(h1.Fill, va)
            h1.Scale(1 / (h1.Integral(0, h1.GetNbinsX() + 1)))
            continue

        if (h1.GetBinError(i) / h1.GetBinContent(i)) > err:
            binning = np.delete(binning, i - 1)
            h1 = rplot.Hist(binning)
            map(h1.Fill, va)
            h1.Scale(1 / (h1.Integral(0, h1.GetNbinsX() + 1)))
            continue

    return binning
Beispiel #3
0
 def fit_shape(self, histo, model, x_range, fitopt='IRMENS'):
     '''Performs a fit with ROOT libraries.
     Model is a tuple defining the function to be used and 
     gets parsed by Plotter.parse_formula'''
     tf1 = self.parse_formula(*model)
     tf1.SetRange(*x_range)
     tf1.SetLineColor(ROOT.EColor.kAzure)
     tf1.SetLineWidth(3)
     result = histo.Fit(tf1, fitopt)  #WL
     # "WL" Use Loglikelihood method and bin contents are not integer,
     #               i.e. histogram is weighted (must have Sumw2() set)
     # "Q"  Quiet mode (minimum printing)
     # "E"  Perform better Errors estimation using Minos technique
     # "M"  More. Improve fit results.
     #      It uses the IMPROVE command of TMinuit (see TMinuit::mnimpr).
     #      This algorithm attempts to improve the found local minimum by searching for a
     #      better one.
     # "R"  Use the Range specified in the function range
     # "N"  Do not store the graphics function, do not draw
     # "S"  The result of the fit is returned in the TFitResultPtr
     numpoints = tf1.GetNpx(
     )  #number of points in which the func is evaluated
     func_hist = plotting.Hist(numpoints, *x_range)
     (ROOT.TVirtualFitter.GetFitter()).GetConfidenceIntervals(func_hist)
     func_hist.linewidth = 0
     func_hist.fillcolor = ROOT.EColor.kAzure - 9
     func_hist.fillstyle = 3013
     func_hist.markersize = 0
     func_hist.Draw('same e3')
     tf1.Draw('same')
     self.keep.extend([tf1, func_hist])
     return tf1
Beispiel #4
0
def fill_hist(vals):
   low = min(vals)
   low = low*0.8 if low > 0 else low*1.2
   hi = max(vals)
   hi *= 1.2 if hi > 0 else 0.8
   if low == hi: #0 == 0
      low, hi = -0.1, 0.1
   hist = plotting.Hist(50, low, hi, title='')
   for v in vals:
      hist.Fill(v)
   return hist
def bins_projectionsX(histo2D):
    projections = []
    oldbinx = [float(histo2D.GetXaxis().GetBinLowEdge(1))]
    oldbinx.extend(float(histo2D.GetXaxis().GetBinUpEdge(x)) for x in xrange(1, histo2D.GetNbinsX()+1))
    for i in range(1, histo2D.GetNbinsY()+1):
        projections.append(plotting.Hist(oldbinx))
        projections[-1].markerstyle = 19+i
        for j in range(1, histo2D.GetNbinsX()+1):
            projections[-1].SetBinContent(j, histo2D.GetBinContent(j,i))
            projections[-1].SetBinError(j, histo2D.GetBinError(j,i))
    return projections
Beispiel #6
0
    def apply_view(self, histo):
        ret = None
        dimensions = histo.get_dimension()
        if dimensions == 1:
            xbins = histo.get_nbins_x()
            edges = [
                histo.xaxis.get_bin_low_edge(i) for i in range(1, xbins + 2)
            ]
            lower_edge = edges[0] - histo.xaxis.get_bin_width(1)
            upped_edge = edges[-1] + histo.xaxis.get_bin_width(xbins)
            ret = plt.Hist([lower_edge] + edges + [upped_edge])
            ret.title = histo.title
            ret.decorate(**histo.decorators)
            ret.xaxis.title = histo.xaxis.title
            ret.yaxis.title = histo.yaxis.title
            for nbin, obin in zip(ret[1:-1], histo):
                nbin.value = obin.value
                nbin.error = obin.error
        elif dimensions == 2:
            xbins = histo.get_nbins_x()
            xedges = [
                histo.xaxis.get_bin_low_edge(i) for i in range(1, xbins + 2)
            ]
            lower_edge = xedges[0] - histo.xaxis.get_bin_width(1)
            upped_edge = xedges[-1] + histo.xaxis.get_bin_width(xbins)
            xedges = [lower_edge] + xedges + [upped_edge]

            ybins = histo.get_nbins_y()
            yedges = [
                histo.yaxis.get_bin_low_edge(i) for i in range(1, ybins + 2)
            ]
            lower_edge = yedges[0] - histo.yaxis.get_bin_width(1)
            upped_edge = yedges[-1] + histo.yaxis.get_bin_width(ybins)
            yedges = [lower_edge] + yedges + [upped_edge]

            ret = plt.Hist2D(xedges,
                             yedges,
                             title=histo.title,
                             **histo.decorators)
            ret.xaxis.title = histo.xaxis.title
            ret.yaxis.title = histo.yaxis.title
            for x_idx in range(1, xbins + 1):
                for y_idx in range(1, ybins + 1):
                    ret[x_idx + 1, y_idx + 1].value = histo[x_idx, y_idx].value
                    ret[x_idx + 1, y_idx + 1].error = histo[x_idx, y_idx].error
        else:
            ret = histo
        return ret
Beispiel #7
0
 def linearize(histo, overflow=False):
     if histo.DIM != 2:
         raise RuntimeError(
             'the histogram I got has dimension %d, which is not supported'
             % histogram.DIM)
     bx = histo.GetNbinsX()
     by = histo.GetNbinsY()
     nbins = (bx + 2) * (by * 2) if overflow else bx * by
     ret = plotting.Hist(nbins, 0, nbins)
     xran = range(0, bx + 2) if overflow else range(1, bx + 1)
     yran = range(0, by + 2) if overflow else range(1, by + 1)
     for idx, xy in enumerate(product(xran, yran)):
         x, y = xy
         ret[idx + 1].value = histo[x, y].value
         ret[idx + 1].error = histo[x, y].error
     ret.entries = histo.entries
     return ret
def run_unfolder(itoy = 0, outdir = opts.dir, tau = opts.tau):
    
    styles = {
        'scan_overlay' : {
            'markerstyle':[0, 29], 'linecolor':[1,1], 
            'markercolor':[1,2], 'drawstyle':['ALP', 'P'],
            'markersize':[0,3]
            },
        'data_overlay' : {
            'linestyle' : [1,0], 'markerstyle':[0,21], 
            'linecolor' : [2,1], 'markercolor':[2,1],
            'drawstyle' : ['hist', 'p'], 'legendstyle' : ['l', 'p']
            },
        'dots' : {
            'markerstyle' : 20, 'markersize' : 2,
            'linestyle' : 0, 'drawstyle' : 'P'
            },
        'line' : {
            'linestyle':1, 'markerstyle':0
            },
        }
    plotter = BasePlotter(
        outdir, defaults = {
            'clone' : False,
            'show_title' : True,
            }
        )
    
    #canvas = plotting.Canvas(name='adsf', title='asdf')
    if "toy" in opts.fit_file:
        data_file_basedir = 'toy_' + str(itoy)
        data_file_dir = data_file_basedir + '/' + opts.var
    else:
        data_file_dir = opts.var
    xaxislabel = set_pretty_label(opts.var)
    scale = 1.
    if opts.no_area_constraint:
        area_constraint='None'
    else:
        area_constraint='Area'
    myunfolding = URUnfolding(regmode = opts.reg_mode, constraint = area_constraint)

    ## Migration matrix preprocessing
    ## remove oflow bins
    var_dir = getattr(resp_file, opts.var)
    migration_matrix = var_dir.migration_matrix
    for bin in migration_matrix: 
        if bin.overflow:
            bin.value = 0 
            bin.error = 0
    myunfolding.matrix = migration_matrix
    thruth_unscaled = var_dir.thruth_unscaled
    reco_unscaled = var_dir.reco_unscaled
    project_reco = 'X' if myunfolding.orientation == 'Vertical' else 'Y'
    project_gen = 'Y' if myunfolding.orientation == 'Vertical' else 'X'
    reco_project = rootpy.asrootpy(
        getattr(migration_matrix, 'Projection%s' % project_reco)()
        )
    gen_project = rootpy.asrootpy(
        getattr(migration_matrix, 'Projection%s' % project_gen)()
        )
    if gen_project.Integral() < thruth_unscaled.Integral():
        eff_correction = ROOT.TGraphAsymmErrors(gen_project, thruth_unscaled)
    elif gen_project.Integral() == thruth_unscaled.Integral():
        eff_correction = None
    else:
        log.warning(
            'Efficiency correction: The visible part of the migration matrix'
            ' has a larger integral than the full one! (%.3f vs. %.3f).\n'
            'It might be a rounding error, but please check!'\
                % (reco_project.Integral(), reco_unscaled.Integral())
            )
        eff_correction = None

    if reco_project.Integral() < reco_unscaled.Integral():
        purity_correction = ROOT.TGraphAsymmErrors(reco_project, reco_unscaled)
    elif reco_project.Integral() == reco_unscaled.Integral():
        purity_correction = None
    else:
        log.warning(
            'Purity correction: The visible part of the migration matrix'
            ' has a larger integral than the full one! (%.3f vs. %.3f).\n'
            'It might be a rounding error, but please check!'\
                % (reco_project.Integral(), reco_unscaled.Integral())
            )
        purity_correction = None


    #flush graphs into histograms (easier to handle)
    eff_hist = gen_project.Clone()
    eff_hist.reset()
    eff_hist.name = 'eff_hist'
    if eff_correction:
        for idx in range(eff_correction.GetN()):
            eff_hist[idx+1].value = eff_correction.GetY()[idx]
            eff_hist[idx+1].error = max(
                eff_correction.GetEYhigh()[idx],
                eff_correction.GetEYlow()[idx]
                )
    else:
        for b in eff_hist:
            b.value = 1.
            b.error = 0.

    purity_hist = reco_project.Clone()
    purity_hist.reset()
    purity_hist.name = 'purity_hist'
    if purity_correction:
        for idx in range(purity_correction.GetN()):
            bin.value = purity_correction.GetY()[idx]
            bin.error = max(
                purity_correction.GetEYhigh()[idx],
                purity_correction.GetEYlow()[idx]
                )
    else:
        for bin in purity_hist:
            bin.value = 1.
            bin.error = 0.

    #Get measured histogram
    measured = None
    if opts.use_reco_truth:
        log.warning("Using the MC reco distribution for the unfolding!")
        measured = getattr(resp_file, opts.var).reco_distribution
    else:
        measured = getattr(data_file, data_file_dir).tt_right

    measured_no_correction = measured.Clone()
    measured_no_correction.name = 'measured_no_correction'
    measured.name = 'measured'
    measured.multiply(purity_hist)
    myunfolding.measured = measured

    #get gen-level distribution
    gen_distro = getattr(resp_file, opts.var).true_distribution.Clone()
    full_true  = gen_distro.Clone()
    full_true.name = 'complete_true_distro'
    gen_distro.multiply(eff_hist)
    gen_distro.name = 'true_distribution'    
    myunfolding.truth = gen_distro
    
    if opts.cov_matrix != 'none':
        if 'toy' in opts.fit_file:
            input_cov_matrix = make_cov_matrix(
                getattr(data_file, data_file_basedir).correlation_matrix,
                getattr(data_file, data_file_dir).tt_right
                )
            input_corr_matrix = make_corr_matrix(
                getattr(data_file, data_file_basedir).correlation_matrix,
                getattr(data_file, data_file_dir).tt_right
                )
        else:
            input_cov_matrix = make_cov_matrix(
                data_file.correlation_matrix,
                getattr(data_file, data_file_dir).tt_right
                )
            input_corr_matrix = make_corr_matrix(
                data_file.correlation_matrix,
                getattr(data_file, data_file_dir).tt_right
                )
        input_cov_matrix.name = 'input_cov_matrix'
        input_corr_matrix.name = 'input_corr_matrix'
        myunfolding.cov_matrix = input_cov_matrix
    myunfolding.InitUnfolder()
    hdata = myunfolding.measured # Duplicate. Remove!

    #plot covariance matrix
    plotter.pad.cd()
    input_corr_matrix.SetStats(False)
    input_corr_matrix.Draw('colz')
    plotter.pad.SetLogz(True)
    plotter.save('correlation_matrix.png')

    #optimize
    best_taus = {}
    if tau >= 0:
        best_taus['External'] = tau
    else:
        t_min, t_max = eval(opts.tau_range)
        best_l, l_curve, graph_x, graph_y  = myunfolding.DoScanLcurve(100, t_min, t_max)
        best_taus['L_curve'] = best_l
        l_curve.SetName('lcurve')
        l_curve.name = 'lcurve'
        graph_x.name = 'l_scan_x'
        graph_y.name = 'l_scan_y'
        l_tau = math.log10(best_l)
        points = [(graph_x.GetX()[i], graph_x.GetY()[i], graph_y.GetY()[i]) 
                  for i in xrange(graph_x.GetN())]
        best = [(x,y) for i, x, y in points if l_tau == i]
        graph_best = plotting.Graph(1)
        graph_best.SetPoint(0, *best[0])
        plotter.reset()
        plotter.overlay(
            [l_curve, graph_best], **styles['scan_overlay']
            )
        plotter.canvas.name = 'L_curve'
    
        info = plotter.make_text_box('#tau = %.5f' % best_l, 'NE')
        #ROOT.TPaveText(0.65,1-canvas.GetTopMargin(),1-canvas.GetRightMargin(),0.999, "brNDC")
        info.Draw()
        canvas.Update()
        plotter.set_subdir('L_curve')
        plotter.save()

        modes = ['RhoMax', 'RhoSquareAvg', 'RhoAvg']
        for mode in modes:
            plotter.set_subdir(mode)
            best_tau, tau_curve, index_best = myunfolding.DoScanTau(100, t_min, t_max, mode)
            best_taus[mode] = best_tau
            tau_curve.SetName('%s_scan' % mode)
            tau_curve.SetMarkerStyle(1)
            points = [(tau_curve.GetX()[i], tau_curve.GetY()[i])
                      for i in xrange(tau_curve.GetN())]
            best = [points[index_best]] 

            graph_best = plotting.Graph(1)
            graph_best.SetPoint(0, *best[0])
            plotter.overlay(
                [tau_curve, graph_best], **styles['scan_overlay']
                )
            plotter.canvas.name = 'c'+tau_curve.GetName()

            info = plotter.make_text_box('#tau = %.5f' % best_tau, 'NE') 
            #ROOT.TPaveText(0.65,1-canvas.GetTopMargin(),1-canvas.GetRightMargin(),0.999, "brNDC")
            info.Draw()
            plotter.save('Tau_curve')

        #force running without regularization
        best_taus['NoReg'] = 0
        for name, best_tau in best_taus.iteritems():
            log.info('best tau option for %s: %.3f' % (name, best_tau))

        if opts.runHandmade:
            #hand-made tau scan
            plotter.set_subdir('Handmade')
            unc_scan, bias_scan = myunfolding.scan_tau(
                200, 10**-6, 50, os.path.join(outdir, 'Handmade', 'scan_info.root'))

            bias_scan.name = 'Handmade'
            bias_scan.title = 'Avg. Bias - Handmade'
            
            plotter.plot(bias_scan, logx=True, logy=True, **styles['dots'])
            plotter.save('bias_scan')

            unc_scan.name = 'Handmade'
            unc_scan.title = 'Avg. Unc. - Handmade'
            plotter.plot(unc_scan, logx=True, logy=True, **styles['dots'])
            plotter.save('unc_scan')
        
            bias_points = [(bias_scan.GetX()[i], bias_scan.GetY()[i])
                           for i in xrange(bias_scan.GetN())]
            unc_points = [(unc_scan.GetX()[i], unc_scan.GetY()[i])
                           for i in xrange(unc_scan.GetN())]
            fom_scan = plotting.Graph(unc_scan.GetN())
            for idx, info in enumerate(zip(bias_points, unc_points)):
                binfo, uinfo = info
                tau, bias = binfo
                _, unc = uinfo
                fom_scan.SetPoint(idx, tau, quad(bias, unc))
            fom_scan.name = 'Handmade'
            fom_scan.title = 'Figure of merit - Handmade'
            plotter.plot(fom_scan, logx=True, logy=True, **styles['dots'])
            plotter.save('fom_scan')

    to_save = []
    outfile = rootpy.io.root_open(os.path.join(outdir, opts.out),'recreate')
    for name, best_tau in best_taus.iteritems():
        plotter.set_subdir(name)
        method_dir = outfile.mkdir(name)
        myunfolding.tau = best_tau

        hdata_unfolded = myunfolding.unfolded
        #apply phase space efficiency corrections
        hdata_unfolded_ps_corrected = hdata_unfolded.Clone()
        hdata_unfolded_ps_corrected.Divide(eff_hist)

        hdata_refolded = myunfolding.refolded
        #apply purity corrections
        hdata_refolded_wpurity = hdata_refolded.Clone()

        error_matrix = myunfolding.ematrix_total

        hcorrelations = myunfolding.rhoI_total
        hbias = myunfolding.bias
        #canvas = overlay(myunfolding.truth, hdata_unfolded)
        myunfolding.truth.xaxis.title = xaxislabel
        hdata_unfolded.xaxis.title = xaxislabel
        n_neg_bins = 0
        for ibin in range(1,hdata_unfolded.GetNbinsX()+1):
            if hdata_unfolded.GetBinContent(ibin) < 0:
                n_neg_bins = n_neg_bins + 1
        hn_neg_bins = plotting.Hist(
            2,-1, 1, name = 'nneg_bins', 
            title = 'Negative bins in ' + hdata_unfolded.GetName()+ ';Bin sign; N_{bins}'
            )
        hn_neg_bins.SetBinContent(1,n_neg_bins)
        hn_neg_bins.SetBinContent(2,hdata_unfolded.GetNbinsX()-n_neg_bins)
        plotter.plot(
            hn_neg_bins, writeTo='unfolding_bins_sign', **styles['line']
            )

        leg = LegendDefinition(
            title=name,
            labels=['Truth','Unfolded'],
            position='ne'
            )
        sumofpulls = 0
        sumofratios = 0
        for ibin in range(1,myunfolding.truth.GetNbinsX()+1):
            binContent1 = myunfolding.truth.GetBinContent(ibin)
            binContent2 = hdata_unfolded.GetBinContent(ibin)
            binError1 = myunfolding.truth.GetBinError(ibin)
            binError2 = hdata_unfolded.GetBinError(ibin)
            error = sqrt(binError1*binError1 + binError2*binError2)
            if error != 0:
                pull = (binContent2-binContent1)/error
            else:
                pull = 9999
            if binContent1 != 0:
                ratio = binContent2/binContent1
            sumofpulls = sumofpulls + pull
            sumofratios = sumofratios + ratio
        sumofpulls = sumofpulls / myunfolding.truth.GetNbinsX()
        sumofratios = sumofratios / myunfolding.truth.GetNbinsX()
        
        hsum_of_pulls = plotting.Hist(
            1, 0, 1, name = 'sum_of_pulls_' + hdata_unfolded.GetName(), 
            title = 'Sum of pulls wrt truth for ' + hdata_unfolded.GetName()+ ';None; #Sigma(pulls) / N_{bins}'
            )
        hsum_of_pulls[1].value = sumofpulls
        plotter.plot(hsum_of_pulls, writeTo='unfolding_sum_of_pulls', **styles['line'])
        
        hsum_of_ratios = plotting.Hist(
            1, 0, 1, name = 'sum_of_ratios_' + hdata_unfolded.GetName(), 
            title = 'Sum of ratios wrt truth for ' + hdata_unfolded.GetName()+ ';None; #Sigma(ratios) / N_{bins}'
            )
        hsum_of_ratios[1].value = sumofratios
        plotter.plot(hsum_of_ratios, writeTo='unfolding_sum_of_ratios', **styles['line'])

        
        plotter.overlay_and_compare(
            [myunfolding.truth], hdata_unfolded, 
            legend_def=leg,
            writeTo='unfolding_pull', **styles['data_overlay']
            )
        plotter.overlay_and_compare(
            [myunfolding.truth], hdata_unfolded, 
            legend_def=leg, method='ratio',
            writeTo='unfolding_ratio', **styles['data_overlay']
            )

        plotter.overlay_and_compare(
            [full_true], hdata_unfolded_ps_corrected, 
            legend_def=leg,
            writeTo='unfolding_pull', **styles['data_overlay']
            )
        plotter.overlay_and_compare(
            [full_true], hdata_unfolded_ps_corrected, 
            legend_def=leg, method='ratio',
            writeTo='unfolding_ratio', **styles['data_overlay']
            )
    
        nbins = myunfolding.measured.GetNbinsX()
        input_distro = getattr(resp_file, opts.var).prefit_distribution
        leg = LegendDefinition(title=name, position='ne')
        myunfolding.measured.xaxis.title = xaxislabel
        hdata_refolded.xaxis.title = xaxislabel
        myunfolding.measured.drawstyle = 'e1'

        style = {'linestyle':[1, 0], 'markerstyle':[20, 20],
                 'markercolor':[2,4], 'linecolor':[2,4],
                 'drawstyle' : ['hist', 'e1'], 'legendstyle' : ['l', 'p'],
                 'title' : ['Refolded', 'Reco']
                 }
        plotter.overlay_and_compare(
            [hdata_refolded], myunfolding.measured,
            legend_def=leg,
            writeTo='refolded_pull', **style
            )
        plotter.overlay_and_compare(
            [hdata_refolded], myunfolding.measured, 
            legend_def=leg, method='ratio',
            writeTo='refolded_ratio', **style
            )
        
        style = {'linestyle':[1,0,0], 'markerstyle':[20,21,21],
                 'markercolor':[2,4,1], 'linecolor':[2,4,1],
                 'drawstyle' : ['hist', 'e1', 'e1'], 'legendstyle' : ['l', 'p', 'p'],
                 'title' : ['Refolded', 'Reco', 'Input']
                 }
        measured_no_correction.drawstyle = 'e1'
        plotter.overlay_and_compare(
            [hdata_refolded_wpurity, measured_no_correction], input_distro, 
            legend_def=leg,
            writeTo='refolded_wpurity_pull', **style
            )
        plotter.overlay_and_compare(
            [hdata_refolded_wpurity, measured_no_correction], input_distro, 
            legend_def=leg, method='ratio',
            writeTo='refolded_wpurity_ratio', **style
            )

        method_dir.WriteTObject(hdata_unfolded, 'hdata_unfolded')
        method_dir.WriteTObject(hdata_unfolded_ps_corrected, 'hdata_unfolded_ps_corrected')
        method_dir.WriteTObject(hdata_refolded, 'hdata_refolded')
        method_dir.WriteTObject(hdata_refolded_wpurity, 'hdata_refolded_wpurity')
        method_dir.WriteTObject(error_matrix, 'error_matrix')
        method_dir.WriteTObject(hbias, 'bias')
        method_dir.WriteTObject(hn_neg_bins, 'hn_neg_bins')
        method_dir.WriteTObject(hsum_of_pulls, 'hsum_of_pulls')
        method_dir.WriteTObject(hsum_of_ratios, 'hsum_of_ratios')


    htruth = myunfolding.truth
    hmatrix = myunfolding.matrix
    hmeasured = myunfolding.measured

    #with rootpy.io.root_open(os.path.join(outdir, opts.out),'recreate') as outfile:
    outfile.cd()
    to_save.extend([
        measured_no_correction,
        eff_hist,
        purity_hist,
        full_true,
        myunfolding.truth,     ## 4
        myunfolding.measured,  ## 5
        myunfolding.matrix,])  ## 6
    if opts.tau < 0:
        to_save.extend([
                l_curve,               ## 9
                tau_curve,             ## 10
                graph_x,
                graph_y
                ])

    if opts.cov_matrix != 'none':
       to_save.extend([input_cov_matrix])
       to_save.extend([input_corr_matrix])

    for i, j in enumerate(to_save):
        log.debug('Saving %s as %s' % (j.name, j.GetName()))
        j.Write()
    getattr(resp_file, opts.var).reco_distribution.Write()
    getattr(resp_file, opts.var).prefit_distribution.Write()
    json = ROOT.TText(0., 0., prettyjson.dumps(best_taus))
    outfile.WriteTObject(json, 'best_taus')
    myunfolding.write_to(outfile, 'urunfolder')
    outfile.Close()
Beispiel #9
0
            # same was predicted for wiggle
            unchanged_predictions[pred_class].append(max_val)
        else:
            # other was predicted for wiggle
            changed_predictions[pred_class].append(max_val)
    unchangeds.append(unchanged_predictions)
    changeds.append(changed_predictions)

for i_node in range(len(event_classes)):
    # loop over samples
    unchanged_hists = []
    changed_hists = []
    for i_sample in range(n_samples):
        # generate unchanged histogram
        values = unchangeds[i_sample][i_node]
        unc_h = rp.Hist(bins, *bin_range, title="unchanged prediction")
        unc_h.markersize = 0
        unc_h.legendstyle = "F"
        unc_h.fillstyle = "solid"
        unc_h.fillcolor = "green"
        unc_h.linecolor = "black"
        unc_h.fill_array(values)
        unchanged_hists.append(unc_h)

        # generate changed histogram
        values = changeds[i_sample][i_node]
        ch_h = rp.Hist(bins, *bin_range, title="changed prediction")
        ch_h.markersize = 0
        ch_h.legendstyle = "F"
        ch_h.fillstyle = "solid"
        ch_h.fillcolor = "darkred"
Beispiel #10
0
    def plot_class_differences(self, log=False):

        pltstyle.init_plot_style()

        nbins = 20
        bin_range = [0., 1.]

        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            node_index = self.data.class_translation[node_cls]

            # get outputs of node
            node_values = self.mainnet_predicted_vector[:, i]
            filtered_node_values = np.array([node_values[k] for k in range(len(node_values)) \
                if self.predicted_classes[k] == node_index])

            filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(node_values)) \
                if self.predicted_classes[k] == node_index]

            histograms = []
            first = True
            max_val = 0
            # loop over other nodes and get those predictions
            for j, other_cls in enumerate(self.event_classes):
                if i == j: continue
                other_index = self.data.class_translation[other_cls]

                other_values = self.mainnet_predicted_vector[:, j]
                filtered_other_values = np.array([other_values[k] for k in range(len(other_values)) \
                    if self.predicted_classes[k] == node_index])

                # get difference of predicted node value and other value
                diff_values = (filtered_node_values -
                               filtered_other_values) / filtered_node_values

                hist = rp.Hist(nbins,
                               *bin_range,
                               title=str(other_cls) + " node",
                               drawstyle="HIST E1 X0")
                pltstyle.set_sig_hist_style(hist, other_cls)
                hist.fill_array(diff_values, filtered_weights)
                if hist.GetMaximum() > max_val: max_val = hist.GetMaximum()

                if first:
                    stack = rp.HistStack([hist], stacked=True)
                    first_hist = hist
                    first = False
                else:
                    histograms.append(hist)

            # create canvas
            canvas = pltstyle.init_canvas()
            # drawing hists
            stack.SetMaximum(max_val * 1.3)
            rp.utils.draw([stack] + histograms,
                          pad=canvas,
                          xtitle="relative difference (" + str(node_cls) +
                          " - X_node)/" + str(node_cls),
                          ytitle="Events")
            if log: canvas.cd().SetLogy()

            # legend
            legend = pltstyle.init_legend([first_hist] + histograms)
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            # save
            out_path = self.save_path + "/node_differences_{}.pdf".format(
                node_cls)
            pltstyle.save_canvas(canvas, out_path)
Beispiel #11
0
    def plot_classification(self, log=False):
        ''' plot all events classified as one category '''

        pltstyle.init_plot_style()
        nbins = 20
        bin_range = [0., 1.]

        ttH_index = self.data.class_translation["ttHbb"]
        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            node_index = self.data.class_translation[node_cls]

            # get outputs of node
            out_values = self.mainnet_predicted_vector[:, i]

            # fill lists according to class
            bkg_hists = []
            weight_integral = 0

            # loop over all classes to fill hist according to predicted class
            for j, truth_cls in enumerate(self.event_classes):
                class_index = self.data.class_translation[truth_cls]

                # filter values per event class
                filtered_values = [ out_values[k] for k in range(len(out_values)) \
                    if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and self.predicted_classes[k] == node_index ]
                filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                    if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and self.predicted_classes[k] == node_index ]

                if j == ttH_index:
                    # signal in this node
                    sig_values = filtered_values
                    sig_label = str(truth_cls)
                    sig_weights = filtered_weights
                else:
                    # background in this node
                    weight_integral += sum(filtered_weights)
                    hist = rp.Hist(nbins, *bin_range, title=str(truth_cls))
                    pltstyle.set_bkg_hist_style(hist, truth_cls)
                    hist.fill_array(filtered_values, filtered_weights)
                    bkg_hists.append(hist)

            # stack backgrounds
            bkg_stack = rp.HistStack(bkg_hists,
                                     stacked=True,
                                     drawstyle="HIST E1 X0")
            bkg_stack.SetMinimum(1e-4)
            max_val = bkg_stack.GetMaximum() * 1.3
            bkg_stack.SetMaximum(max_val)

            # plot signal
            weight_sum = sum(sig_weights)
            scale_factor = 1. * weight_integral / weight_sum
            sig_weights = [w * scale_factor for w in sig_weights]

            sig_title = sig_label + "*{:.3f}".format(scale_factor)
            sig_hist = rp.Hist(nbins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            # creatin canvas

            canvas = pltstyle.init_canvas()

            # drawing hists
            rp.utils.draw([bkg_stack, sig_hist],
                          xtitle="Events predicted as " + node_cls,
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            # legend
            legend = pltstyle.init_legend(bkg_hists + [sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)
            print("S/B = {}".format(weight_sum / weight_integral))
            # save
            out_path = self.save_path + "/predictions_{}.pdf".format(node_cls)

            pltstyle.save_canvas(canvas, out_path)
Beispiel #12
0
    def plot_discriminators(self, log=False, cut_on_variable=None):
        ''' plot discriminators for output classes '''
        pltstyle.init_plot_style()

        nbins = 50
        bin_range = [0., 1.]

        # get some ttH specific info for plotting
        ttH_index = self.data.class_translation["ttHbb"]
        ttH_true_labels = self.data.get_ttH_flag()

        # apply cut to output node value if wanted
        if cut_on_variable:
            cut_class = cut_on_variable["class"]
            cut_value = cut_on_variable["val"]

            cut_index = self.data.class_translation[cut_class]
            cut_prediction = self.mainnet_predicted_vector[:, cut_index]

        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            # get outputs of node
            out_values = self.mainnet_predicted_vector[:, i]

            # calculate node specific ROC value
            node_ROC = roc_auc_score(ttH_true_labels, out_values)

            # fill lists according to class
            bkg_hists = []
            weight_integral = 0

            # loop over all classes to fill hist according to predicted class
            for j, truth_cls in enumerate(self.event_classes):
                class_index = self.data.class_translation[truth_cls]

                # filter values per event class
                if cut_on_variable:
                    filtered_values = [ out_values[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and cut_prediction[k] <= cut_value]
                    filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and cut_prediction[k] <= cut_value]
                else:
                    filtered_values = [ out_values[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index ]
                    filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index ]

                if j == ttH_index:
                    # ttH signal
                    sig_values = filtered_values
                    sig_label = str(truth_cls)
                    sig_weights = filtered_weights
                else:
                    # background in this node
                    weight_integral += sum(filtered_weights)
                    hist = rp.Hist(nbins, *bin_range, title=str(truth_cls))
                    pltstyle.set_bkg_hist_style(hist, truth_cls)
                    hist.fill_array(filtered_values, filtered_weights)
                    bkg_hists.append(hist)

            # stack backgrounds
            bkg_stack = rp.HistStack(bkg_hists,
                                     stacked=True,
                                     drawstyle="HIST E1 X0")
            bkg_stack.SetMinimum(1e-4)
            max_val = bkg_stack.GetMaximum() * 1.3
            bkg_stack.SetMaximum(max_val)

            # plot signal
            weight_sum = sum(sig_weights)
            scale_factor = 1. * weight_integral / weight_sum
            sig_weights = [w * scale_factor for w in sig_weights]

            sig_title = sig_label + "*{:.3f}".format(scale_factor)
            sig_hist = rp.Hist(nbins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            # creating canvas
            canvas = pltstyle.init_canvas()

            # drawing histograms
            rp.utils.draw([bkg_stack, sig_hist],
                          xtitle=node_cls + " Discriminator",
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            # creating legend
            legend = pltstyle.init_legend(bkg_hists + [sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            # add ROC value to plot
            pltstyle.add_ROC_value(canvas, node_ROC)

            # save canvas
            out_path = self.save_path + "/discriminator_{}.pdf".format(
                node_cls)
            pltstyle.save_canvas(canvas, out_path)
Beispiel #13
0
def run_module(**kwargs):
   args = Struct(**kwargs)
   mkdir(args.out)
   canvas = plotting.Canvas()

   pars_regex = None
   if args.pars_regex:
      pars_regex = re.compile(args.pars_regex)
      
   sample_regex = None
   if args.sample_regex:
      sample_regex = re.compile(args.sample_regex)

   pars_out_regex = None
   if args.pars_out_regex:
      pars_out_regex = re.compile(args.pars_out_regex)
      
   sample_out_regex = None
   if args.sample_out_regex:
      sample_out_regex = re.compile(args.sample_out_regex)

   output_file = io.root_open('%s/output.root' % args.out, 'recreate')
   fpars_tdir = output_file.mkdir('floating_pars')
   pulls_tdir = output_file.mkdir('postfit_pulls')

   failed_fits = set()
   fit_statuses = plotting.Hist(10, -1.5, 8.5)
   with io.root_open(args.mlfit) as mlfit:
      failed_results = []
      passes_results = []
      pars = {}
      yields = {}
      first = True
      toys = [i.GetName() for i in mlfit.keys() if i.GetName().startswith('toy_')] if not args.oneshot else [None]
      log.info('examining %i toys' % len(toys))
      prefit_nuis = None
      if args.useprefit:
         prefit_nuis = ArgSet(mlfit.nuisances_prefit)

      nfailed = 0
      for toy in toys:
         toy_dir = mlfit.Get(toy) if not args.oneshot else mlfit
         keys = set([i.GetName() for i in toy_dir.GetListOfKeys()])
         if 'norm_fit_s' not in keys or 'fit_s' not in keys:
            log.error('Fit %s failed to produce output!' % toy)
            failed_fits.add(toy)
            continue
         norms = ArgSet(
            toy_dir.Get(
               'norm_fit_s'
               )
            )
         norms = [i for i in norms]

         fit_result = toy_dir.Get(
            'fit_s'
            )
         fit_pars = ArgList(fit_result.floatParsFinal())
         
         if first:
            first = False
            for i in fit_pars:
               if pars_regex and not pars_regex.match(i.GetName()): continue
               if pars_out_regex and pars_out_regex.match(i.GetName()): continue
               pars[i.GetName()] = []

            for i in norms:
               if sample_regex and not sample_regex.match(i.GetName()): continue
               if sample_out_regex and sample_out_regex.match(i.GetName()): continue
               yields[i.GetName()] = []

         fit_statuses.Fill(fit_result.status())
         fit_failed = any(i.getError() == 0 for i in fit_pars) or fit_result.status() != 0
         if fit_failed:
            log.error('Fit %s failed to converge properly. It has status %i!' % (toy, fit_result.status()))
            nfailed+=1
            failed_fits.add(toy)
            failed_results.append(fit_result)
            continue

         passes_results.append(fit_result)

         for i in norms:
            if i.GetName() in yields:
               yields[i.GetName()].append(i)
            
         for i in fit_pars:
            if i.GetName() in pars:
               pars[i.GetName()].append(i)

      if nfailed:
         log.error('There were %i fit failed!' % nfailed)
      with open('%s/info.txt' % args.out, 'w') as info:
         info.write('There were %i fit failed!\n' % nfailed)
      fit_statuses.Draw()
      canvas.SaveAs('%s/fit_status.png' % args.out)

      if not args.nopars:
         #Plots the post-fit distribution of the POI and nuisances
         out = os.path.join(args.out, 'floating_parameters')
         mkdir(out)
         for i, j in yields.iteritems():
            make_hist(i, j, out, prefix='yield_')
            
         for i, j in pars.iteritems():
            make_hist(i, j, out, prefix='par_')

      if not args.postpulls:
         #Plots the post-fit pulls (nuisance(post) - nuisance(pre))/unc(post)
         pulls_dir = os.path.join(args.out, 'postfit_pulls')
         mkdir(pulls_dir)

         ROOT.gStyle.SetOptFit(11111)
         singlenames=set()
         for name,value in pars.iteritems():
            if pars_regex and not pars_regex.match(name): continue
            if pars_out_regex and pars_out_regex.match(i): continue
            singlenames.add(get_key(name))
         
         pulls_mean_summary={}
         pulls_sigma_summary={}
         deltas_mean_summary={}
         deltas_sigma_summary={}
         for name in singlenames:
            nbins = 0
            for fullname in pars:
               if name in fullname:
                  nbins = nbins + 1
            #print name, nbins
            try:
               hist = plotting.Hist(nbins, 0.5,nbins+0.5, name = "%s_pull_mean_summary" %name)
               pulls_mean_summary[name] = hist
               hist = plotting.Hist(nbins, 0.5,nbins+0.5, name = "%s_pull_sigma_summary" %name)
               pulls_sigma_summary[name] = hist
               hist = plotting.Hist(nbins, 0.5,nbins+0.5, name = "%s_delta_mean_summary" %name)
               deltas_mean_summary[name] = hist
               hist = plotting.Hist(nbins, 0.5,nbins+0.5, name = "%s_delta_sigma_summary" %name)
               deltas_sigma_summary[name] = hist
            except:
               set_trace()

         pulls_mean_summary[  'all'] = plotting.Hist(len(pars), 0.5, len(pars)+0.5, name = "all_pull_mean_summary"  )
         pulls_sigma_summary[ 'all'] = plotting.Hist(len(pars), 0.5, len(pars)+0.5, name = "all_pull_sigma_summary" )
         deltas_mean_summary[ 'all'] = plotting.Hist(len(pars), 0.5, len(pars)+0.5, name = "all_delta_mean_summary" )
         deltas_sigma_summary['all'] = plotting.Hist(len(pars), 0.5, len(pars)+0.5, name = "all_delta_sigma_summary")

         
         for i, j in pars.iteritems():
            make_post_distributions(i, j, pulls_dir, pulls_mean_summary, pulls_sigma_summary, prefix='pull_',
                                    dist='pull', prefit=prefit_nuis, tdir=pulls_tdir, skipFit=args.skipFit)
            make_post_distributions(i, j, pulls_dir, deltas_mean_summary, deltas_sigma_summary, prefix='delta_',
                                    dist='delta', prefit=prefit_nuis, tdir=pulls_tdir, skipFit=args.skipFit)
         
         for name,histo in pulls_mean_summary.iteritems():
            canvas = plotting.Canvas()
            histo.Draw()
            canvas.Update()
            line = ROOT.TLine(histo.GetBinLowEdge(1),0,histo.GetBinLowEdge(histo.GetNbinsX()+1),0)
            line.SetLineColor(2)
            line.Draw("same")
            canvas.Update()
            canvas.SaveAs('%s/%s.png' % (pulls_dir,histo.GetName()))
            canvas.SaveAs('%s/%s.pdf' % (pulls_dir,histo.GetName()))
            pulls_tdir.WriteObject(histo, histo.GetName())
         for name,histo in pulls_sigma_summary.iteritems():
            canvas = plotting.Canvas()
            histo.Draw()
            canvas.Update()
            line = ROOT.TLine(histo.GetBinLowEdge(1),1,histo.GetBinLowEdge(histo.GetNbinsX()+1),1)
            line.SetLineColor(2)
            line.Draw("same")
            canvas.Update()
            canvas.SaveAs('%s/%s.png' % (pulls_dir,histo.GetName()))
            canvas.SaveAs('%s/%s.pdf' % (pulls_dir,histo.GetName()))
            pulls_tdir.WriteObject(histo, histo.GetName())
         
         for name,histo in deltas_mean_summary.iteritems():
            canvas = plotting.Canvas()
            histo.Draw()
            canvas.Update()
            line = ROOT.TLine(histo.GetBinLowEdge(1),0,histo.GetBinLowEdge(histo.GetNbinsX()+1),0)
            line.SetLineColor(2)
            line.Draw("same")
            canvas.Update()
            canvas.SaveAs('%s/%s.png' % (pulls_dir,histo.GetName()))
            canvas.SaveAs('%s/%s.pdf' % (pulls_dir,histo.GetName()))
            pulls_tdir.WriteObject(histo, histo.GetName())
         for name,histo in deltas_sigma_summary.iteritems():
            histo.Draw()
            canvas.Update()
            #line = ROOT.TLine(histo.GetBinLowEdge(1),1,histo.GetBinLowEdge(histo.GetNbinsX()+1),1)
            #line.Draw("same")
            canvas.Update()
            canvas.SaveAs('%s/%s.png' % (pulls_dir,histo.GetName()))
            canvas.SaveAs('%s/%s.pdf' % (pulls_dir,histo.GetName()))
            pulls_tdir.WriteObject(histo, histo.GetName())


   if not args.noshapes:
      #Overlays the prefit values of the different shapes with the envelope of 
      #what is fitted by the toys
      out = os.path.join(args.out, 'shapes')
      mkdir(out)
      biased_shapes={}
      if args.biasFile:
         with io.root_open(args.biasFile) as biased:
            biased_dir= biased.prefit \
               if hasattr(biased, 'prefit') else \
               None
            ROOT.TH1.AddDirectory(False)
            for key in biased_dir.keys():
               biased_shapes[key.name] = asrootpy(key.ReadObj().Clone())

      with io.root_open(args.harvested) as harvest:
         has_prefit = hasattr(harvest, 'prefit')
         prefit = harvest.prefit if has_prefit else None
         toys = EnvelopeView(
            *[harvest.get(i.GetName()).get(args.variable) 
              for i in harvest.keys() 
              if i.GetName().startswith('toy_')
              and (i.GetName() not in failed_fits) ]
             )
         #shapes = [i.GetName() for i in prefit.keys()] #FIXME! should not depend on prefit!
         first_toy = [i.GetName() for i in harvest.keys() if i.GetName().startswith('toy_')][0]
         not_shapes = set('correlation_matrix')
         shapes = [i.GetName() for i in harvest.get(first_toy).get(args.variable).keys() if i.GetName() not in not_shapes]

         for shape in shapes:
            canvas = plotting.Canvas()
            canvas.SetCanvasSize( canvas.GetWw(), int(canvas.GetWh()*1.3) )
            upper_pad = plotting.Pad(0, 0.33, 1., 1.)
            lower_pad = plotting.Pad(0, 0., 1., 0.33)
            upper_pad.set_bottom_margin(0.001)
            lower_pad.set_top_margin(0.005)
            lower_pad.set_bottom_margin(lower_pad.get_bottom_margin()*3)
            upper_pad.Draw()
            lower_pad.Draw()
            upper_pad.cd()

            biased_shape = biased_shapes.get(shape, None)
            toy_shape = toys.Get(shape)
            pre_shape = None

            legend = plotting.Legend(
               3+int(has_prefit)+int(bool(biased_shape)), 
               rightmargin=0.07, topmargin=0.05, leftmargin=0.45)
            legend.SetBorderSize(0)
            
            if biased_shape:
               biased_shape.title = 'true shape'
               biased_shape.legendstyle = 'p'
               biased_shape.inlegend = True               
               biased_shape.drawstyle = 'p'

            if has_prefit:
               pre_shape = prefit.Get(shape)
               pre_shape.title = 'input shape'
               pre_shape.legendstyle = 'p'
               pre_shape.drawstyle = 'p'
               if biased_shape:
                  pre_shape.legendstyle = 'l'
                  pre_shape.drawstyle = 'hist'
                  pre_shape.linecolor = 'blue'
                  pre_shape.fillstyle = 0
            
            toy_shape.Draw()
            if has_prefit:
               pre_shape.Draw('same')
            if biased_shape:
               biased_shape.Draw('same')
               
            legend.AddEntry(toy_shape.two_sigma)
            legend.AddEntry(toy_shape.one_sigma)
            legend.AddEntry(toy_shape.median)
            if has_prefit:
               legend.AddEntry(pre_shape)
            if biased_shape:
               legend.AddEntry(biased_shape)
            legend.Draw()

            #compute pulls
            pulls = None
            labelSizeFactor2 = (upper_pad.GetHNDC()+lower_pad.GetHNDC()) / lower_pad.GetHNDC()
            labelSizeFactor1 = (upper_pad.GetHNDC()+lower_pad.GetHNDC()) / upper_pad.GetHNDC()
            label_factor = labelSizeFactor2/labelSizeFactor1
            if has_prefit or biased_shape:
               lower_pad.cd()            
               ref_histo = biased_shape if biased_shape else pre_shape
               pulls = toy_shape.median.Clone()
               pulls.Reset()
               for ref, toy, pull in zip(ref_histo, toy_shape, pulls):
                  if toy.error == (0.0, 0.0): continue
                  abs_pull = toy.median-ref.value
                  #pick correct side of the errors
                  err = toy.error[1] if abs_pull < 0 else toy.error[0]
                  pull.value = abs_pull/err
               pulls.xaxis.title = args.variable
               pulls.yaxis.title = 'pulls'
               pulls.set_label_size(ROOT.gStyle.GetLabelSize()*label_factor, "XYZ")
               pulls.set_title_size(ROOT.gStyle.GetTitleSize()*label_factor, "XYZ")
               pulls.yaxis.set_title_offset(pulls.GetYaxis().GetTitleOffset()/label_factor)
               
               pulls.Draw()

            canvas.Update()
            canvas.SaveAs('%s/%s.png' % (out, shape))
            canvas.SaveAs('%s/%s.pdf' % (out, shape))
            with open(os.path.join(out, '%s.json' % shape), 'w') as jfile:
               jfile.write(toy_shape.json())

   output_file.Close()
def unfolding_toy_diagnostics(indir, variable):

    plotter = BasePlotter(defaults={
        'clone': False,
        'name_canvas': True,
        'show_title': True,
        'save': {
            'png': True,
            'pdf': False
        }
    }, )
    styles = {
        'dots': {
            'linestyle': 0,
            'markerstyle': 21,
            'markercolor': 1
        },
        'compare': {
            'linesstyle': [1, 0],
            'markerstyle': [0, 21],
            'markercolor': [2, 1],
            'linecolor': [2, 1],
            'drawstyle': ['hist', 'pe'],
            'legendstyle': ['l', 'p']
        }
    }

    xaxislabel = set_pretty_label(variable)

    true_distribution = None

    curdir = os.getcwd()
    os.chdir(indir)
    toydirs = get_immediate_subdirectories(".")

    methods = []
    pulls_lists = {}
    pull_means_lists = {}
    pull_mean_errors_lists = {}
    pull_sums_lists = {}
    pull_sigmas_lists = {}
    pull_sigma_errors_lists = {}
    deltas_lists = {}
    delta_means_lists = {}
    delta_mean_errors_lists = {}
    delta_sigmas_lists = {}
    delta_sigma_errors_lists = {}
    ratio_sums_lists = {}
    nneg_bins_lists = {}
    unfoldeds_lists = {}
    unfolded_sigmas_lists = {}
    taus_lists = {}

    histos_created = False
    lists_created = False
    idir = 0
    true_distro = None
    #loop over toys
    for directory in toydirs:
        if not directory.startswith('toy_'): continue
        os.chdir(directory)
        log.debug('Inspecting toy %s' % directory)
        idir = idir + 1
        i = 0
        if not os.path.isfile("result_unfolding.root"):
            raise ValueError('root file not found in %s' % os.getcwd())
        with io.root_open("result_unfolding.root") as inputfile:
            log.debug('Iteration %s over the file' % i)
            i = i + 1
            if not methods:
                keys = [i.name for i in inputfile.keys()]
                for key in keys:
                    if hasattr(getattr(inputfile, key), "hdata_unfolded"):
                        methods.append(key)

            unfolded_hists = [
                inputfile.get('%s/hdata_unfolded' % i) for i in methods
            ]
            unfolded_wps_hists = [
                inputfile.get('%s/hdata_unfolded_ps_corrected' % i)
                for i in methods
            ]
            for unf, unfps, method in zip(unfolded_hists, unfolded_wps_hists,
                                          methods):
                unf.name = method
                unfps.name = method
            if true_distro is None:
                true_distribution = inputfile.true_distribution
                ROOT.TH1.AddDirectory(False)
                true_distro = true_distribution.Clone()
            taus = prettyjson.loads(inputfile.best_taus.GetTitle())
            if len(taus_lists) == 0:
                taus_lists = dict((i, []) for i in taus)
            for i, t in taus.iteritems():
                taus_lists[i].append(t)

            for histo in unfolded_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    for ibin in range(1, nbins + 1):
                        outname = "pull_" + name + "_bin" + str(ibin)
                        pulls_lists[outname] = []
                        outname = "delta_" + name + "_bin" + str(ibin)
                        deltas_lists[outname] = []
                        outname = "unfolded_" + name + "_bin" + str(ibin)
                        unfoldeds_lists[outname] = []
                        unfolded_sigmas_lists[outname] = []
                    outname = "pull_" + name
                    pull_means_lists[outname] = {}
                    pull_mean_errors_lists[outname] = {}
                    pull_sigmas_lists[outname] = {}
                    pull_sigma_errors_lists[outname] = {}

                    outname = "delta_" + name
                    delta_means_lists[outname] = {}
                    delta_mean_errors_lists[outname] = {}
                    delta_sigmas_lists[outname] = {}
                    delta_sigma_errors_lists[outname] = {}

                for ibin in range(1, nbins + 1):
                    outname = "pull_" + name + "_bin" + str(ibin)
                    unfolded_bin_content = histo.GetBinContent(ibin)
                    unfolded_bin_error = histo.GetBinError(ibin)
                    true_bin_content = true_distro.GetBinContent(ibin)
                    true_bin_error = true_distro.GetBinError(ibin)
                    total_bin_error = math.sqrt(unfolded_bin_error**2)  #???
                    if (total_bin_error != 0):
                        pull = (unfolded_bin_content -
                                true_bin_content) / total_bin_error
                    else:
                        pull = 9999
                    log.debug(
                        'unfolded bin content %s +/- %s, true bin content %s, pull %s'
                        % (unfolded_bin_content, unfolded_bin_error,
                           true_bin_content, pull))
                    pulls_lists[outname].append(pull)
                    outname = "delta_" + name + "_bin" + str(ibin)
                    delta = unfolded_bin_content - true_bin_content
                    log.debug(
                        'unfolded bin content %s +/- %s, true bin content %s, delta %s'
                        % (unfolded_bin_content, unfolded_bin_error,
                           true_bin_content, delta))
                    deltas_lists[outname].append(delta)
                    outname = "unfolded_" + name + "_bin" + str(ibin)
                    unfoldeds_lists[outname].append(unfolded_bin_content)
                    unfolded_sigmas_lists[outname].append(unfolded_bin_error)

            nneg_bins_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("nneg_bins")
            ]
            nneg_bins_hists = [asrootpy(i.ReadObj()) for i in nneg_bins_hists]
            for histo in nneg_bins_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    nneg_bins_lists[outname] = []
                outname = name
                nneg_bins_lists[outname].append(histo.GetBinContent(1))

            pull_sums_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("sum_of_pulls")
            ]
            pull_sums_hists = [asrootpy(i.ReadObj()) for i in pull_sums_hists]
            for histo in pull_sums_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    pull_sums_lists[outname] = []
                outname = name
                pull_sums_lists[outname].append(histo.GetBinContent(1))

            ratio_sums_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("sum_of_ratios")
            ]
            ratio_sums_hists = [
                asrootpy(i.ReadObj()) for i in ratio_sums_hists
            ]
            for histo in ratio_sums_hists:
                #create ratio/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    ratio_sums_lists[outname] = []
                outname = name
                ratio_sums_lists[outname].append(histo.GetBinContent(1))

            #after the first iteration on the file all the lists are created
            lists_created = True

        os.chdir("..")

    #create histograms
    #histo containers
    taus = {}
    for name, vals in taus_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if val_min == val_max:
            if tau_nbins % 2:  #if odd
                val_min, val_max = val_min - 0.01, val_min + 0.01
            else:
                brange = 0.02
                bwidth = brange / tau_nbins
                val_min, val_max = val_min - 0.01 + bwidth / 2., val_min + 0.01 + bwidth / 2.
        title = '#tau choice - %s ;#tau;N_{toys}' % (name)
        histo = Hist(tau_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        taus[name] = histo

    pulls = {}
    for name, vals in pulls_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        abs_max = max(abs(val_min), abs(val_max))
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Pulls - %s - %s ;Pull;N_{toys}' % (binno, method)
        histo = Hist(pull_nbins, -abs_max, abs_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        pulls[name] = histo

    deltas = {}
    for name, vals in deltas_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Deltas - %s - %s ;Delta;N_{toys}' % (binno, method)
        histo = Hist(delta_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        deltas[name] = histo

    unfoldeds = {}
    for name, vals in unfoldeds_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Unfoldeds - %s - %s ;Unfolded;N_{toys}' % (binno, method)
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        unfoldeds[name] = histo

    nneg_bins = {}
    for name, vals, in nneg_bins_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0 if val_min > 0 else val_min - 1
        val_max = max(vals)
        val_max = 0 if val_max < 0 else val_max + 1
        if 'L_curve' in name:
            method = 'L_curve'
        else:
            set_trace()
            _, method, _ = tuple(name.split('_'))
        title = 'N of negative bins - %s ;N. neg bins;N_{toys}' % method
        histo = Hist(int(val_max - val_min + 1),
                     val_min,
                     val_max,
                     name=name,
                     title=title)
        for val in vals:
            histo.Fill(val)
        nneg_bins[name] = histo

    pull_sums = {}
    for name, vals in pull_sums_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
        else:
            set_trace()
            _, _, _, _, _, method = tuple(name.split('_'))
        title = 'Pull sums - %s ;#Sigma(pull)/N_{bins};N_{toys}' % method
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        pull_sums[name] = histo

    ratio_sums = {}
    for name, vals in ratio_sums_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            set_trace()
            _, _, _, _, _, method = tuple(name.split('_'))
        title = 'Ratio sums - %s;#Sigma(ratio)/N_{bins};N_{toys}' % method
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        ratio_sums[name] = histo

    unfolded_sigmas = {}
    for name, vals in unfolded_sigmas_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Unfolded uncertainties - %s - %s ;Uncertainty;N_{toys}' % (
            binno, method)
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        unfolded_sigmas[name] = histo

    for name, histo in pulls.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        pull_means_lists[general_name][idx] = mean
        pull_mean_errors_lists[general_name][idx] = meanError
        pull_sigmas_lists[general_name][idx] = sigma
        pull_sigma_errors_lists[general_name][idx] = sigmaError

    for name, histo in deltas.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        delta_means_lists[general_name][idx] = mean
        delta_mean_errors_lists[general_name][idx] = meanError
        delta_sigmas_lists[general_name][idx] = sigma
        delta_sigma_errors_lists[general_name][idx] = sigmaError

    outfile = rootpy.io.File("unfolding_diagnostics.root", "RECREATE")
    outfile.cd()

    pull_means = {}
    pull_sigmas = {}
    pull_means_summary = {}
    pull_sigmas_summary = {}
    delta_means = {}
    delta_sigmas = {}
    delta_means_summary = {}
    delta_sigmas_summary = {}

    for outname, pmeans in pull_means_lists.iteritems():
        outname_mean = outname + "_mean"
        outtitle = "Pull means - " + outname + ";Pull mean; N_{toys}"
        pull_mean_min = min(pmeans.values())
        pull_mean_max = max(pmeans.values())
        pull_mean_newmin = pull_mean_min - (pull_mean_max -
                                            pull_mean_min) * 0.5
        pull_mean_newmax = pull_mean_max + (pull_mean_max -
                                            pull_mean_min) * 0.5
        pull_means[outname] = plotting.Hist(pull_mean_nbins,
                                            pull_mean_newmin,
                                            pull_mean_newmax,
                                            name=outname_mean,
                                            title=outtitle)

        outname_mean_summary = outname + "_mean_summary"
        outtitle_mean_summary = "Pull mean summary - " + outname
        histocloned = true_distro.Clone(outname_mean_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Pull mean'
        histocloned.title = outtitle_mean_summary
        pull_means_summary[outname] = histocloned

        for idx, pmean in pmeans.iteritems():
            pull_means[outname].Fill(pmean)
            histocloned[idx].value = pmean
            histocloned[idx].error = pull_mean_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(pmeans.values()),
                                       max(pmeans.values()))

    for outname, psigmas in pull_sigmas_lists.iteritems():
        outname_sigma = outname + "_sigma"
        outtitle_sigma = "Pull #sigma's - " + outname + ";Pull #sigma; N_{toys}"
        pull_sigma_min = min(psigmas.values())
        pull_sigma_max = max(psigmas.values())
        pull_sigma_newmin = pull_sigma_min - (pull_sigma_max -
                                              pull_sigma_min) * 0.5
        pull_sigma_newmax = pull_sigma_max + (pull_sigma_max -
                                              pull_sigma_min) * 0.5
        pull_sigmas[outname] = plotting.Hist(pull_sigma_nbins,
                                             pull_sigma_newmin,
                                             pull_sigma_newmax,
                                             name=outname_sigma,
                                             title=outtitle_sigma)

        outname_sigma_summary = outname + "_sigma_summary"
        outtitle_sigma_summary = "Pull #sigma summary - " + outname
        histocloned = true_distro.Clone(outname_sigma_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Pull #sigma'
        histocloned.title = outtitle_sigma_summary
        pull_sigmas_summary[outname] = histocloned

        for idx, psigma in psigmas.iteritems():
            pull_sigmas[outname].Fill(psigma)
            histocloned[idx].value = psigma
            histocloned[idx].error = pull_sigma_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(psigmas.values()),
                                       max(psigmas.values()))

    for outname, dmeans in delta_means_lists.iteritems():
        outname_mean = outname + "_mean"
        outtitle = "Delta means - " + outname + ";Delta mean; N_{toys}"
        delta_mean_min = min(dmeans.values())
        delta_mean_max = max(dmeans.values())
        delta_mean_newmin = delta_mean_min - (delta_mean_max -
                                              delta_mean_min) * 0.5
        delta_mean_newmax = delta_mean_max + (delta_mean_max -
                                              delta_mean_min) * 0.5
        delta_means[outname] = plotting.Hist(delta_mean_nbins,
                                             delta_mean_newmin,
                                             delta_mean_newmax,
                                             name=outname_mean,
                                             title=outtitle)

        outname_mean_summary = outname + "_mean_summary"
        outtitle_mean_summary = "Delta mean summary - " + outname
        histocloned = true_distro.Clone(outname_mean_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Delta mean'
        histocloned.title = outtitle_mean_summary
        delta_means_summary[outname] = histocloned

        for idx, dmean in dmeans.iteritems():
            delta_means[outname].Fill(dmean)
            histocloned[idx].value = dmean
            histocloned[idx].error = delta_mean_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(dmeans.values()),
                                       max(dmeans.values()))

    for outname, dsigmas in delta_sigmas_lists.iteritems():
        outname_sigma = outname + "_sigma"
        outtitle_sigma = "Delta #sigma's - " + outname + ";Delta #sigma; N_{toys}"
        delta_sigma_min = min(dsigmas.values())
        delta_sigma_max = max(dsigmas.values())
        delta_sigma_newmin = delta_sigma_min - (delta_sigma_max -
                                                delta_sigma_min) * 0.5
        delta_sigma_newmax = delta_sigma_max + (delta_sigma_max -
                                                delta_sigma_min) * 0.5
        delta_sigmas[outname] = plotting.Hist(delta_sigma_nbins,
                                              delta_sigma_newmin,
                                              delta_sigma_newmax,
                                              name=outname_sigma,
                                              title=outtitle_sigma)

        outname_sigma_summary = outname + "_sigma_summary"
        outtitle_sigma_summary = "Delta #sigma summary - " + outname
        histocloned = true_distro.Clone(outname_sigma_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Delta #sigma'
        histocloned.title = outtitle_sigma_summary
        delta_sigmas_summary[outname] = histocloned

        for idx, dsigma in dsigmas.iteritems():
            delta_sigmas[outname].Fill(dsigma)
            histocloned[idx].value = dsigma
            histocloned[idx].error = delta_sigma_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(dsigmas.values()),
                                       max(dsigmas.values()))

    unfolded_summary = {}
    unfolded_average = {}
    unfolded_envelope = {}
    for name, histo in unfoldeds.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        if general_name not in unfolded_summary:
            histo = true_distro.Clone("%s_unfolded_summary" % general_name)
            outtitle_unfolded_summary = "Unfolded summary - " + general_name
            histo.Reset()
            histo.xaxis.title = xaxislabel
            histo.yaxis.title = 'N_{events}'
            histo.title = outtitle_unfolded_summary
            unfolded_summary[general_name] = histo

            unfolded_envelope[general_name] = histo.Clone(
                "%s_unfolded_envelope" % general_name)
            unfolded_average[general_name] = histo.Clone(
                "%s_unfolded_average" % general_name)

        unfolded_summary[general_name][idx].value = mean
        unfolded_summary[general_name][idx].error = meanError

        unfolded_envelope[general_name][idx].value = mean
        unfolded_envelope[general_name][idx].error = sigma

        unfolded_average[general_name][idx].value = mean
        unfolded_average[general_name][idx].error = \
           unfolded_sigmas['%s_bin%i' % (general_name, idx)].GetMean()

    plotter.set_subdir('taus')
    for name, histo in taus.iteritems():
        #canvas = plotter.create_and_write_canvas_single(0, 21, 1, False, False, histo, write=False)
        plotter.canvas.cd()
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)

        info = plotter.make_text_box(
            'mode #tau = %.5f' % histo[histo.GetMaximumBin()].x.center,
            position=(plotter.pad.GetLeftMargin(), plotter.pad.GetTopMargin(),
                      0.3, 0.025))
        info.Draw()

        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('pulls')
    for name, histo in pulls.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_means.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()
    for name, histo in pull_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()

    plotter.set_subdir('pull_summaries')
    for name, histo in pull_means_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        line = ROOT.TLine(histo.GetBinLowEdge(1), 0,
                          histo.GetBinLowEdge(histo.GetNbinsX() + 1), 0)
        line.Draw("same")
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_sigmas_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        line = ROOT.TLine(histo.GetBinLowEdge(1), 1,
                          histo.GetBinLowEdge(histo.GetNbinsX() + 1), 1)
        line.Draw("same")
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('deltas')
    for name, histo in deltas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in delta_means.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()
    for name, histo in delta_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()

    plotter.set_subdir('delta_summaries')
    for name, histo in delta_means_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in delta_sigmas_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolding_unc')
    for name, histo in unfolded_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded')
    for name, histo in unfoldeds.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_summaries')
    for name, histo in unfolded_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    for name, histo in unfolded_summary.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_average')
    for name, histo in unfolded_average.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        #set_trace()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_envelope')
    for name, histo in unfolded_envelope.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('figures_of_merit')
    for name, histo in nneg_bins.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_sums.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in ratio_sums.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    outfile.close()
    os.chdir(curdir)
xml_name = os.path.join(os.getcwd(),"weights/TMVAClassification_KNN.weights.xml")
target   = os.path.join(os.getcwd(),output_file.replace('.root','.weights.xml'))
cmd      = 'mv %s %s' % (xml_name, target)
log.info(cmd)
os.system( cmd )

#############################################
##  Reads back and produces control plots
#############################################

hist_maps = {}
for var in args.variables:
    if 'pt' in var.lower():
        hist_maps[var] = {
            'estimate'     : plotting.Hist([10,12,15,20,25,30,35,40,45,50,60,70,100,150,200]), #plotting.Hist(100, 0, 200),
            'estimate_all' : plotting.Hist([10,12,15,20,25,30,35,40,45,50,60,70,100,150,200]), #plotting.Hist(100, 0, 200),
            'pass'     : plotting.Hist([10,12,15,20,25,30,35,40,45,50,60,70,100,150,200]),
            'all'      : plotting.Hist([10,12,15,20,25,30,35,40,45,50,60,70,100,150,200]),
        }
    elif 'jets' in var.lower() or 'njet' in var.lower():
        hist_maps[var] = {
            'estimate'     : plotting.Hist(12, 0, 12),
            'estimate_all' : plotting.Hist(12, 0, 12),
            'pass'         : plotting.Hist(12, 0, 12),
            'all'          : plotting.Hist(12, 0, 12),
        }
    else:
        hist_maps[var] = {
            'estimate'     : plotting.Hist(100, 0, 200),
            'estimate_all' : plotting.Hist(100, 0, 200),
Beispiel #16
0

style = get_style('ATLAS')
tsize = 18
style.SetHistLineWidth(20)
style.SetLabelSize(tsize, "x")
style.SetTitleSize(tsize, "x")
style.SetLabelSize(tsize, "y")
style.SetTitleSize(tsize, "y")

set_style(style)

########################Define Histos##############################3
h_nPMuon_4GeV = plt.Hist(5,
                         0,
                         5,
                         name="h_nPMuon_4GeV",
                         title="h_nPMuon_4GeV",
                         legendstyle='lep')
h_nTMuon_4GeV = plt.Hist(5,
                         0,
                         5,
                         name="h_nTMuon_4GeV",
                         title="h_nMuon_4GeV",
                         legendstyle='lep')
h_nLMuon_4GeV = plt.Hist(5,
                         0,
                         5,
                         name="h_nLMuon_4GeV",
                         title="h_nLMuon_4GeV",
                         legendstyle='lep')
Beispiel #17
0
    return discr_nodes


dnn_aachen.load_trained_model()
data = dnn_aachen.data.get_test_data(as_matrix=False)
prediction_before = dnn_aachen.main_net.predict(data.values)

discriminators_before = gen_discrs(prediction_before)
before_hists = []

bins = 100
bin_range = [0., 1.]
for i_node in range(len(event_classes)):
    node_values = discriminators_before[i_node]
    h = rp.Hist(bins, *bin_range, title="before smearing")
    h.markersize = 0
    h.legendstyle = "F"
    h.fillstyle = "solid"
    h.linecolor = "black"
    h.fill_array(node_values)
    before_hists.append(h)

# generate loop over different std deviation
stddevs = np.arange(0.005, 0.305, 0.01)
print(stddevs)
#np.arange(0.01,0.31,0.01)

rate_of_other_argmax = []
mean_diff = []
std_diff = []
Beispiel #18
0
            ('PF GSFTrk',
             seeding & (electrons.gsf_pt > 0) & electrons.has_pfGSF_trk),
            ('PF Block',
             seeding & (electrons.gsf_pt > 0) & electrons.has_pfBlock),
            ('PF Block+ECAL', seeding & (electrons.gsf_pt > 0)
             & electrons.has_pfBlock_with_ECAL),
            ('PF Ele',
             seeding & (electrons.gsf_pt > 0) & electrons.has_pfEgamma),
            ('GED Core',
             seeding & (electrons.gsf_pt > 0) & electrons.has_ele_core),
            ('GED Electrons', seeding & (electrons.ele_pt > 0)),
        ]
        to_plot = {'KTF Track', 'seeding', 'GSF Track', 'GED Electrons'}
        masks = dict(ordered_masks)
        for name, mask in masks.iteritems():
            hist = rplt.Hist([1, 2, 4, 5, 6, 7, 8, 9, 10]
                             if not args.test else [0, 1, 2, 5, 10])
            masked = electrons[mask] if mask is not None else electrons
            root_numpy.fill_hist(
                hist, masked.gen_pt
                if not (args.allTracks or args.fakes) else masked.trk_pt)
            histos[name] = hist

        efficiencies = {}
        markersize = 6
        first = True
        plt.clf()
        offset = 0.1 * (len(masks) - 1) / 2
        for passing, _ in ordered_masks:
            if passing == 'all': continue
            efficiencies[passing] = rplt.Efficiency(histos[passing],
                                                    histos['all'])
Beispiel #19
0
    def plot_prenet_nodes(self, log=False):
        ''' plot prenet nodes '''
        pltstyle.init_plot_style()
        n_bins = 20
        bin_range = [0., 1.]

        for i, node_cls in enumerate(self.prenet_targets):
            # get outputs of class node
            out_values = self.prenet_predicted_vector[:, i]

            prenet_labels = self.data.get_prenet_test_labels()[:, i]

            sig_values = [
                out_values[k] for k in range(len(out_values))
                if prenet_labels[k] == 1
            ]
            bkg_values = [
                out_values[k] for k in range(len(out_values))
                if prenet_labels[k] == 0
            ]

            sig_weights = [
                self.data.get_lumi_weights()[k] for k in range(len(out_values))
                if prenet_labels[k] == 1
            ]
            bkg_weights = [
                self.data.get_lumi_weights()[k] for k in range(len(out_values))
                if prenet_labels[k] == 0
            ]

            bkg_sig_ratio = 1. * sum(bkg_weights) / sum(sig_weights)
            sig_weights = [w * bkg_sig_ratio for w in sig_weights]

            sig_label = "True"
            bkg_label = "False"

            sig_title = sig_label + "*{:.3f}".format(bkg_sig_ratio)

            # plot output
            bkg_hist = rp.Hist(n_bins, *bin_range, title=bkg_label)
            pltstyle.set_bkg_hist_style(bkg_hist, bkg_label)
            bkg_hist.fill_array(bkg_values, bkg_weights)

            sig_hist = rp.Hist(n_bins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            stack = rp.HistStack([bkg_hist],
                                 stacked=True,
                                 drawstyle="HIST E1 X0")
            stack.SetMinimum(1e-4)

            canvas = pltstyle.init_canvas()

            rp.utils.draw([stack, sig_hist],
                          xtitle="prenet node {}".format(node_cls),
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            legend = pltstyle.init_legend([bkg_hist, sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            out_path = self.save_path + "/prenet_output_{}.pdf".format(
                node_cls)

            pltstyle.save_canvas(canvas, out_path)
Beispiel #20
0
def run_module(**kwargs):
   args = Struct(**kwargs)
   redundant_binning = {}
   with open(args.binning) as bins:
      redundant_binning = prettyjson.loads(bins.read())

   #group binning to merge jet categories
   grouping = re.compile('^(?P<base_category>[A-Za-z0-9]+)_\d+Jets$')
   binning = {}
   for var, categories in redundant_binning.iteritems():
      if var not in binning: binning[var] = {}
      for category, bin_info in categories.iteritems():
         m = grouping.match(category)
         if not m:
            raise ValueError('Category name %s did not match the regex!' % category)
         base = m.group('base_category')
         if base not in binning[var]:
            binning[var][base] = copy.deepcopy(bin_info)
         else:
            #make sure that all jet categories have the same bin edges
            assert(binning[var][base] == bin_info)

   for info in binning.itervalues():
      edges = set(
         i['low_edge'] for i in info.itervalues()
         )
      edges.update(
         set(
            i['up_edge'] for i in info.itervalues()
            )
         )
      edges = sorted(list(edges))
      info['edges'] = edges

   prefit_norms = {}
   with io.root_open(args.input_shape) as shapes:
      for key in shapes.keys():
         obj = key.ReadObj()
         if not obj.InheritsFrom('TDirectory'): continue
         
         for hkey in obj.GetListOfKeys():
            hist = hkey.ReadObj()
            if not hist.InheritsFrom('TH1'): continue
            
            err = ROOT.Double()
            integral = hist.IntegralAndError(
               1,
               hist.GetNbinsX(),
               err
               )
            val_id = uuid.uuid4().hex
            val =  ROOT.RooRealVar(val_id, val_id, integral)
            val.setError(err)
            prefit_norms['%s/%s' % (obj.GetName(), hist.GetName())] = val
               

   with io.root_open(args.fitresult) as results:
      dirs = ['']
      if args.toys:
         dirs = [i.GetName() for i in results.GetListOfKeys() if i.GetName().startswith('toy_')]

      postfit_table = Table('Bin:%7s', 'Category:%10s', 'Sample:%20s', 'Yield:%5.1f', 'Error:%5.1f')
      postfit_norms = [(i.name, i.value, i.error) for i in results.norm_fit_s]
      postfit_norms.sort(key=lambda x: x[0])
      for name, val, err in postfit_norms:
         bincat, sample = tuple(name.split('/'))
         bin, category = tuple(bincat.split('_'))
         postfit_table.add_line(bin, category, sample, val, err)
      postfit_table.add_separator()
      with open(args.out.replace('.root','.raw_txt'), 'w') as out:
         out.write(postfit_table.__repr__())
      
      with io.root_open(args.out, 'recreate') as output:
         is_prefit_done = False
         for dirname in dirs:
            input_dir = results.Get(dirname) if dirname else results
            if not hasattr(input_dir, 'fit_s'):
               continue

            fit_result = input_dir.fit_s
            pars = asrootpy(fit_result.floatParsFinal())
            prefit_pars = asrootpy(fit_result.floatParsInit())
            tdir = output
            if dirname: 
               tdir = output.mkdir(dirname)
               tdir.cd()
            hcorr = asrootpy(fit_result.correlationHist())
            par_names = set([i.name for i in pars])
            yield_par_names = filter(lambda x: '_FullYield_' in x, par_names)
            hcorr.Write()
            for observable, info in binning.iteritems():
               var_dir = tdir.mkdir(observable)
               var_dir.cd()
               hists = {}
               hists_prefit = {}
               for rvar_name in yield_par_names:
                  category, sample = tuple(rvar_name.split('_FullYield_'))
                  if category not in info: continue
                  if sample not in hists:
                     hists[sample] = plotting.Hist(
                        info['edges'],
                        name = sample
                        )
                     if not is_prefit_done:
                        hists_prefit[sample] = plotting.Hist(
                           info['edges'],
                           name = sample
                           )
                  idx = info[category]['idx']+1
                  hists[sample][idx].value = pars[rvar_name].value
                  error = pars[rvar_name].error
                  hists[sample][idx].error = max(abs(i) for i in error) if isinstance(error, tuple) else error #get max of asym error

                  if not is_prefit_done:
                     hists_prefit[sample][idx].value = prefit_pars[rvar_name].value
                  ## Pre-fit floating parameters have no uncertainties
                  ## hists_prefit[sample][idx].error = max(prefit_pars[rvar_name].error)
                  logging.debug(
                     'Assigning label %s to bin %i for %s/%s' % (rvar_name, idx, category, sample)
                     )
                  hists[sample].xaxis.SetBinLabel(idx, rvar_name)

               for h in hists.itervalues():
                  logging.debug( h.Write() )

               if not is_prefit_done:
                  is_prefit_done = True
                  output.mkdir('prefit').cd()
                  for h in hists_prefit.itervalues():
                     logging.debug( h.Write() )