def run_unfolder(itoy = 0, outdir = opts.dir, tau = opts.tau):
    
    styles = {
        'scan_overlay' : {
            'markerstyle':[0, 29], 'linecolor':[1,1], 
            'markercolor':[1,2], 'drawstyle':['ALP', 'P'],
            'markersize':[0,3]
            },
        'data_overlay' : {
            'linestyle' : [1,0], 'markerstyle':[0,21], 
            'linecolor' : [2,1], 'markercolor':[2,1],
            'drawstyle' : ['hist', 'p'], 'legendstyle' : ['l', 'p']
            },
        'dots' : {
            'markerstyle' : 20, 'markersize' : 2,
            'linestyle' : 0, 'drawstyle' : 'P'
            },
        'line' : {
            'linestyle':1, 'markerstyle':0
            },
        }
    plotter = BasePlotter(
        outdir, defaults = {
            'clone' : False,
            'show_title' : True,
            }
        )
    
    #canvas = plotting.Canvas(name='adsf', title='asdf')
    if "toy" in opts.fit_file:
        data_file_basedir = 'toy_' + str(itoy)
        data_file_dir = data_file_basedir + '/' + opts.var
    else:
        data_file_dir = opts.var
    xaxislabel = set_pretty_label(opts.var)
    scale = 1.
    if opts.no_area_constraint:
        area_constraint='None'
    else:
        area_constraint='Area'
    myunfolding = URUnfolding(regmode = opts.reg_mode, constraint = area_constraint)

    ## Migration matrix preprocessing
    ## remove oflow bins
    var_dir = getattr(resp_file, opts.var)
    migration_matrix = var_dir.migration_matrix
    for bin in migration_matrix: 
        if bin.overflow:
            bin.value = 0 
            bin.error = 0
    myunfolding.matrix = migration_matrix
    thruth_unscaled = var_dir.thruth_unscaled
    reco_unscaled = var_dir.reco_unscaled
    project_reco = 'X' if myunfolding.orientation == 'Vertical' else 'Y'
    project_gen = 'Y' if myunfolding.orientation == 'Vertical' else 'X'
    reco_project = rootpy.asrootpy(
        getattr(migration_matrix, 'Projection%s' % project_reco)()
        )
    gen_project = rootpy.asrootpy(
        getattr(migration_matrix, 'Projection%s' % project_gen)()
        )
    if gen_project.Integral() < thruth_unscaled.Integral():
        eff_correction = ROOT.TGraphAsymmErrors(gen_project, thruth_unscaled)
    elif gen_project.Integral() == thruth_unscaled.Integral():
        eff_correction = None
    else:
        log.warning(
            'Efficiency correction: The visible part of the migration matrix'
            ' has a larger integral than the full one! (%.3f vs. %.3f).\n'
            'It might be a rounding error, but please check!'\
                % (reco_project.Integral(), reco_unscaled.Integral())
            )
        eff_correction = None

    if reco_project.Integral() < reco_unscaled.Integral():
        purity_correction = ROOT.TGraphAsymmErrors(reco_project, reco_unscaled)
    elif reco_project.Integral() == reco_unscaled.Integral():
        purity_correction = None
    else:
        log.warning(
            'Purity correction: The visible part of the migration matrix'
            ' has a larger integral than the full one! (%.3f vs. %.3f).\n'
            'It might be a rounding error, but please check!'\
                % (reco_project.Integral(), reco_unscaled.Integral())
            )
        purity_correction = None


    #flush graphs into histograms (easier to handle)
    eff_hist = gen_project.Clone()
    eff_hist.reset()
    eff_hist.name = 'eff_hist'
    if eff_correction:
        for idx in range(eff_correction.GetN()):
            eff_hist[idx+1].value = eff_correction.GetY()[idx]
            eff_hist[idx+1].error = max(
                eff_correction.GetEYhigh()[idx],
                eff_correction.GetEYlow()[idx]
                )
    else:
        for b in eff_hist:
            b.value = 1.
            b.error = 0.

    purity_hist = reco_project.Clone()
    purity_hist.reset()
    purity_hist.name = 'purity_hist'
    if purity_correction:
        for idx in range(purity_correction.GetN()):
            bin.value = purity_correction.GetY()[idx]
            bin.error = max(
                purity_correction.GetEYhigh()[idx],
                purity_correction.GetEYlow()[idx]
                )
    else:
        for bin in purity_hist:
            bin.value = 1.
            bin.error = 0.

    #Get measured histogram
    measured = None
    if opts.use_reco_truth:
        log.warning("Using the MC reco distribution for the unfolding!")
        measured = getattr(resp_file, opts.var).reco_distribution
    else:
        measured = getattr(data_file, data_file_dir).tt_right

    measured_no_correction = measured.Clone()
    measured_no_correction.name = 'measured_no_correction'
    measured.name = 'measured'
    measured.multiply(purity_hist)
    myunfolding.measured = measured

    #get gen-level distribution
    gen_distro = getattr(resp_file, opts.var).true_distribution.Clone()
    full_true  = gen_distro.Clone()
    full_true.name = 'complete_true_distro'
    gen_distro.multiply(eff_hist)
    gen_distro.name = 'true_distribution'    
    myunfolding.truth = gen_distro
    
    if opts.cov_matrix != 'none':
        if 'toy' in opts.fit_file:
            input_cov_matrix = make_cov_matrix(
                getattr(data_file, data_file_basedir).correlation_matrix,
                getattr(data_file, data_file_dir).tt_right
                )
            input_corr_matrix = make_corr_matrix(
                getattr(data_file, data_file_basedir).correlation_matrix,
                getattr(data_file, data_file_dir).tt_right
                )
        else:
            input_cov_matrix = make_cov_matrix(
                data_file.correlation_matrix,
                getattr(data_file, data_file_dir).tt_right
                )
            input_corr_matrix = make_corr_matrix(
                data_file.correlation_matrix,
                getattr(data_file, data_file_dir).tt_right
                )
        input_cov_matrix.name = 'input_cov_matrix'
        input_corr_matrix.name = 'input_corr_matrix'
        myunfolding.cov_matrix = input_cov_matrix
    myunfolding.InitUnfolder()
    hdata = myunfolding.measured # Duplicate. Remove!

    #plot covariance matrix
    plotter.pad.cd()
    input_corr_matrix.SetStats(False)
    input_corr_matrix.Draw('colz')
    plotter.pad.SetLogz(True)
    plotter.save('correlation_matrix.png')

    #optimize
    best_taus = {}
    if tau >= 0:
        best_taus['External'] = tau
    else:
        t_min, t_max = eval(opts.tau_range)
        best_l, l_curve, graph_x, graph_y  = myunfolding.DoScanLcurve(100, t_min, t_max)
        best_taus['L_curve'] = best_l
        l_curve.SetName('lcurve')
        l_curve.name = 'lcurve'
        graph_x.name = 'l_scan_x'
        graph_y.name = 'l_scan_y'
        l_tau = math.log10(best_l)
        points = [(graph_x.GetX()[i], graph_x.GetY()[i], graph_y.GetY()[i]) 
                  for i in xrange(graph_x.GetN())]
        best = [(x,y) for i, x, y in points if l_tau == i]
        graph_best = plotting.Graph(1)
        graph_best.SetPoint(0, *best[0])
        plotter.reset()
        plotter.overlay(
            [l_curve, graph_best], **styles['scan_overlay']
            )
        plotter.canvas.name = 'L_curve'
    
        info = plotter.make_text_box('#tau = %.5f' % best_l, 'NE')
        #ROOT.TPaveText(0.65,1-canvas.GetTopMargin(),1-canvas.GetRightMargin(),0.999, "brNDC")
        info.Draw()
        canvas.Update()
        plotter.set_subdir('L_curve')
        plotter.save()

        modes = ['RhoMax', 'RhoSquareAvg', 'RhoAvg']
        for mode in modes:
            plotter.set_subdir(mode)
            best_tau, tau_curve, index_best = myunfolding.DoScanTau(100, t_min, t_max, mode)
            best_taus[mode] = best_tau
            tau_curve.SetName('%s_scan' % mode)
            tau_curve.SetMarkerStyle(1)
            points = [(tau_curve.GetX()[i], tau_curve.GetY()[i])
                      for i in xrange(tau_curve.GetN())]
            best = [points[index_best]] 

            graph_best = plotting.Graph(1)
            graph_best.SetPoint(0, *best[0])
            plotter.overlay(
                [tau_curve, graph_best], **styles['scan_overlay']
                )
            plotter.canvas.name = 'c'+tau_curve.GetName()

            info = plotter.make_text_box('#tau = %.5f' % best_tau, 'NE') 
            #ROOT.TPaveText(0.65,1-canvas.GetTopMargin(),1-canvas.GetRightMargin(),0.999, "brNDC")
            info.Draw()
            plotter.save('Tau_curve')

        #force running without regularization
        best_taus['NoReg'] = 0
        for name, best_tau in best_taus.iteritems():
            log.info('best tau option for %s: %.3f' % (name, best_tau))

        if opts.runHandmade:
            #hand-made tau scan
            plotter.set_subdir('Handmade')
            unc_scan, bias_scan = myunfolding.scan_tau(
                200, 10**-6, 50, os.path.join(outdir, 'Handmade', 'scan_info.root'))

            bias_scan.name = 'Handmade'
            bias_scan.title = 'Avg. Bias - Handmade'
            
            plotter.plot(bias_scan, logx=True, logy=True, **styles['dots'])
            plotter.save('bias_scan')

            unc_scan.name = 'Handmade'
            unc_scan.title = 'Avg. Unc. - Handmade'
            plotter.plot(unc_scan, logx=True, logy=True, **styles['dots'])
            plotter.save('unc_scan')
        
            bias_points = [(bias_scan.GetX()[i], bias_scan.GetY()[i])
                           for i in xrange(bias_scan.GetN())]
            unc_points = [(unc_scan.GetX()[i], unc_scan.GetY()[i])
                           for i in xrange(unc_scan.GetN())]
            fom_scan = plotting.Graph(unc_scan.GetN())
            for idx, info in enumerate(zip(bias_points, unc_points)):
                binfo, uinfo = info
                tau, bias = binfo
                _, unc = uinfo
                fom_scan.SetPoint(idx, tau, quad(bias, unc))
            fom_scan.name = 'Handmade'
            fom_scan.title = 'Figure of merit - Handmade'
            plotter.plot(fom_scan, logx=True, logy=True, **styles['dots'])
            plotter.save('fom_scan')

    to_save = []
    outfile = rootpy.io.root_open(os.path.join(outdir, opts.out),'recreate')
    for name, best_tau in best_taus.iteritems():
        plotter.set_subdir(name)
        method_dir = outfile.mkdir(name)
        myunfolding.tau = best_tau

        hdata_unfolded = myunfolding.unfolded
        #apply phase space efficiency corrections
        hdata_unfolded_ps_corrected = hdata_unfolded.Clone()
        hdata_unfolded_ps_corrected.Divide(eff_hist)

        hdata_refolded = myunfolding.refolded
        #apply purity corrections
        hdata_refolded_wpurity = hdata_refolded.Clone()

        error_matrix = myunfolding.ematrix_total

        hcorrelations = myunfolding.rhoI_total
        hbias = myunfolding.bias
        #canvas = overlay(myunfolding.truth, hdata_unfolded)
        myunfolding.truth.xaxis.title = xaxislabel
        hdata_unfolded.xaxis.title = xaxislabel
        n_neg_bins = 0
        for ibin in range(1,hdata_unfolded.GetNbinsX()+1):
            if hdata_unfolded.GetBinContent(ibin) < 0:
                n_neg_bins = n_neg_bins + 1
        hn_neg_bins = plotting.Hist(
            2,-1, 1, name = 'nneg_bins', 
            title = 'Negative bins in ' + hdata_unfolded.GetName()+ ';Bin sign; N_{bins}'
            )
        hn_neg_bins.SetBinContent(1,n_neg_bins)
        hn_neg_bins.SetBinContent(2,hdata_unfolded.GetNbinsX()-n_neg_bins)
        plotter.plot(
            hn_neg_bins, writeTo='unfolding_bins_sign', **styles['line']
            )

        leg = LegendDefinition(
            title=name,
            labels=['Truth','Unfolded'],
            position='ne'
            )
        sumofpulls = 0
        sumofratios = 0
        for ibin in range(1,myunfolding.truth.GetNbinsX()+1):
            binContent1 = myunfolding.truth.GetBinContent(ibin)
            binContent2 = hdata_unfolded.GetBinContent(ibin)
            binError1 = myunfolding.truth.GetBinError(ibin)
            binError2 = hdata_unfolded.GetBinError(ibin)
            error = sqrt(binError1*binError1 + binError2*binError2)
            if error != 0:
                pull = (binContent2-binContent1)/error
            else:
                pull = 9999
            if binContent1 != 0:
                ratio = binContent2/binContent1
            sumofpulls = sumofpulls + pull
            sumofratios = sumofratios + ratio
        sumofpulls = sumofpulls / myunfolding.truth.GetNbinsX()
        sumofratios = sumofratios / myunfolding.truth.GetNbinsX()
        
        hsum_of_pulls = plotting.Hist(
            1, 0, 1, name = 'sum_of_pulls_' + hdata_unfolded.GetName(), 
            title = 'Sum of pulls wrt truth for ' + hdata_unfolded.GetName()+ ';None; #Sigma(pulls) / N_{bins}'
            )
        hsum_of_pulls[1].value = sumofpulls
        plotter.plot(hsum_of_pulls, writeTo='unfolding_sum_of_pulls', **styles['line'])
        
        hsum_of_ratios = plotting.Hist(
            1, 0, 1, name = 'sum_of_ratios_' + hdata_unfolded.GetName(), 
            title = 'Sum of ratios wrt truth for ' + hdata_unfolded.GetName()+ ';None; #Sigma(ratios) / N_{bins}'
            )
        hsum_of_ratios[1].value = sumofratios
        plotter.plot(hsum_of_ratios, writeTo='unfolding_sum_of_ratios', **styles['line'])

        
        plotter.overlay_and_compare(
            [myunfolding.truth], hdata_unfolded, 
            legend_def=leg,
            writeTo='unfolding_pull', **styles['data_overlay']
            )
        plotter.overlay_and_compare(
            [myunfolding.truth], hdata_unfolded, 
            legend_def=leg, method='ratio',
            writeTo='unfolding_ratio', **styles['data_overlay']
            )

        plotter.overlay_and_compare(
            [full_true], hdata_unfolded_ps_corrected, 
            legend_def=leg,
            writeTo='unfolding_pull', **styles['data_overlay']
            )
        plotter.overlay_and_compare(
            [full_true], hdata_unfolded_ps_corrected, 
            legend_def=leg, method='ratio',
            writeTo='unfolding_ratio', **styles['data_overlay']
            )
    
        nbins = myunfolding.measured.GetNbinsX()
        input_distro = getattr(resp_file, opts.var).prefit_distribution
        leg = LegendDefinition(title=name, position='ne')
        myunfolding.measured.xaxis.title = xaxislabel
        hdata_refolded.xaxis.title = xaxislabel
        myunfolding.measured.drawstyle = 'e1'

        style = {'linestyle':[1, 0], 'markerstyle':[20, 20],
                 'markercolor':[2,4], 'linecolor':[2,4],
                 'drawstyle' : ['hist', 'e1'], 'legendstyle' : ['l', 'p'],
                 'title' : ['Refolded', 'Reco']
                 }
        plotter.overlay_and_compare(
            [hdata_refolded], myunfolding.measured,
            legend_def=leg,
            writeTo='refolded_pull', **style
            )
        plotter.overlay_and_compare(
            [hdata_refolded], myunfolding.measured, 
            legend_def=leg, method='ratio',
            writeTo='refolded_ratio', **style
            )
        
        style = {'linestyle':[1,0,0], 'markerstyle':[20,21,21],
                 'markercolor':[2,4,1], 'linecolor':[2,4,1],
                 'drawstyle' : ['hist', 'e1', 'e1'], 'legendstyle' : ['l', 'p', 'p'],
                 'title' : ['Refolded', 'Reco', 'Input']
                 }
        measured_no_correction.drawstyle = 'e1'
        plotter.overlay_and_compare(
            [hdata_refolded_wpurity, measured_no_correction], input_distro, 
            legend_def=leg,
            writeTo='refolded_wpurity_pull', **style
            )
        plotter.overlay_and_compare(
            [hdata_refolded_wpurity, measured_no_correction], input_distro, 
            legend_def=leg, method='ratio',
            writeTo='refolded_wpurity_ratio', **style
            )

        method_dir.WriteTObject(hdata_unfolded, 'hdata_unfolded')
        method_dir.WriteTObject(hdata_unfolded_ps_corrected, 'hdata_unfolded_ps_corrected')
        method_dir.WriteTObject(hdata_refolded, 'hdata_refolded')
        method_dir.WriteTObject(hdata_refolded_wpurity, 'hdata_refolded_wpurity')
        method_dir.WriteTObject(error_matrix, 'error_matrix')
        method_dir.WriteTObject(hbias, 'bias')
        method_dir.WriteTObject(hn_neg_bins, 'hn_neg_bins')
        method_dir.WriteTObject(hsum_of_pulls, 'hsum_of_pulls')
        method_dir.WriteTObject(hsum_of_ratios, 'hsum_of_ratios')


    htruth = myunfolding.truth
    hmatrix = myunfolding.matrix
    hmeasured = myunfolding.measured

    #with rootpy.io.root_open(os.path.join(outdir, opts.out),'recreate') as outfile:
    outfile.cd()
    to_save.extend([
        measured_no_correction,
        eff_hist,
        purity_hist,
        full_true,
        myunfolding.truth,     ## 4
        myunfolding.measured,  ## 5
        myunfolding.matrix,])  ## 6
    if opts.tau < 0:
        to_save.extend([
                l_curve,               ## 9
                tau_curve,             ## 10
                graph_x,
                graph_y
                ])

    if opts.cov_matrix != 'none':
       to_save.extend([input_cov_matrix])
       to_save.extend([input_corr_matrix])

    for i, j in enumerate(to_save):
        log.debug('Saving %s as %s' % (j.name, j.GetName()))
        j.Write()
    getattr(resp_file, opts.var).reco_distribution.Write()
    getattr(resp_file, opts.var).prefit_distribution.Write()
    json = ROOT.TText(0., 0., prettyjson.dumps(best_taus))
    outfile.WriteTObject(json, 'best_taus')
    myunfolding.write_to(outfile, 'urunfolder')
    outfile.Close()
Exemple #2
0
wrong = [
    'semilep_wrong', 'semilep_right_thad', 'semilep_right_tlep',
    'other_tt_decay'
]
output_name_base = 'prob'


def merge_views(inview, subnames):
    subviews = [views.SubdirectoryView(inview, i) for i in subnames]
    return views.SumView(*subviews)


jobid = os.environ['jobid']
input_files = glob.glob('results/%s/permProbComputer/ttJets*.root' % jobid)
plotter = BasePlotter('plots/%s/permutations' % jobid, )
plotter.reset()

log.info('found %d input files' % len(input_files))

for fname in input_files:
    sample = extract_sample(fname)
    tfile = io.root_open(fname)
    test_dir = tfile.Get(right[0])
    right_view = merge_views(tfile, right)
    wrong_view = merge_views(tfile, wrong)
    #write output file
    outname = 'inputs/%s/INPUT/%s_%s.root' % (jobid, output_name_base, sample)
    with io.root_open(outname, 'w') as out:
        for shift in systematics:
            if not hasattr(test_dir, shift):
                log.warning('I could not find %s in %s, skipping systematic' %