def run_unfolder(itoy = 0, outdir = opts.dir, tau = opts.tau): styles = { 'scan_overlay' : { 'markerstyle':[0, 29], 'linecolor':[1,1], 'markercolor':[1,2], 'drawstyle':['ALP', 'P'], 'markersize':[0,3] }, 'data_overlay' : { 'linestyle' : [1,0], 'markerstyle':[0,21], 'linecolor' : [2,1], 'markercolor':[2,1], 'drawstyle' : ['hist', 'p'], 'legendstyle' : ['l', 'p'] }, 'dots' : { 'markerstyle' : 20, 'markersize' : 2, 'linestyle' : 0, 'drawstyle' : 'P' }, 'line' : { 'linestyle':1, 'markerstyle':0 }, } plotter = BasePlotter( outdir, defaults = { 'clone' : False, 'show_title' : True, } ) #canvas = plotting.Canvas(name='adsf', title='asdf') if "toy" in opts.fit_file: data_file_basedir = 'toy_' + str(itoy) data_file_dir = data_file_basedir + '/' + opts.var else: data_file_dir = opts.var xaxislabel = set_pretty_label(opts.var) scale = 1. if opts.no_area_constraint: area_constraint='None' else: area_constraint='Area' myunfolding = URUnfolding(regmode = opts.reg_mode, constraint = area_constraint) ## Migration matrix preprocessing ## remove oflow bins var_dir = getattr(resp_file, opts.var) migration_matrix = var_dir.migration_matrix for bin in migration_matrix: if bin.overflow: bin.value = 0 bin.error = 0 myunfolding.matrix = migration_matrix thruth_unscaled = var_dir.thruth_unscaled reco_unscaled = var_dir.reco_unscaled project_reco = 'X' if myunfolding.orientation == 'Vertical' else 'Y' project_gen = 'Y' if myunfolding.orientation == 'Vertical' else 'X' reco_project = rootpy.asrootpy( getattr(migration_matrix, 'Projection%s' % project_reco)() ) gen_project = rootpy.asrootpy( getattr(migration_matrix, 'Projection%s' % project_gen)() ) if gen_project.Integral() < thruth_unscaled.Integral(): eff_correction = ROOT.TGraphAsymmErrors(gen_project, thruth_unscaled) elif gen_project.Integral() == thruth_unscaled.Integral(): eff_correction = None else: log.warning( 'Efficiency correction: The visible part of the migration matrix' ' has a larger integral than the full one! (%.3f vs. %.3f).\n' 'It might be a rounding error, but please check!'\ % (reco_project.Integral(), reco_unscaled.Integral()) ) eff_correction = None if reco_project.Integral() < reco_unscaled.Integral(): purity_correction = ROOT.TGraphAsymmErrors(reco_project, reco_unscaled) elif reco_project.Integral() == reco_unscaled.Integral(): purity_correction = None else: log.warning( 'Purity correction: The visible part of the migration matrix' ' has a larger integral than the full one! (%.3f vs. %.3f).\n' 'It might be a rounding error, but please check!'\ % (reco_project.Integral(), reco_unscaled.Integral()) ) purity_correction = None #flush graphs into histograms (easier to handle) eff_hist = gen_project.Clone() eff_hist.reset() eff_hist.name = 'eff_hist' if eff_correction: for idx in range(eff_correction.GetN()): eff_hist[idx+1].value = eff_correction.GetY()[idx] eff_hist[idx+1].error = max( eff_correction.GetEYhigh()[idx], eff_correction.GetEYlow()[idx] ) else: for b in eff_hist: b.value = 1. b.error = 0. purity_hist = reco_project.Clone() purity_hist.reset() purity_hist.name = 'purity_hist' if purity_correction: for idx in range(purity_correction.GetN()): bin.value = purity_correction.GetY()[idx] bin.error = max( purity_correction.GetEYhigh()[idx], purity_correction.GetEYlow()[idx] ) else: for bin in purity_hist: bin.value = 1. bin.error = 0. #Get measured histogram measured = None if opts.use_reco_truth: log.warning("Using the MC reco distribution for the unfolding!") measured = getattr(resp_file, opts.var).reco_distribution else: measured = getattr(data_file, data_file_dir).tt_right measured_no_correction = measured.Clone() measured_no_correction.name = 'measured_no_correction' measured.name = 'measured' measured.multiply(purity_hist) myunfolding.measured = measured #get gen-level distribution gen_distro = getattr(resp_file, opts.var).true_distribution.Clone() full_true = gen_distro.Clone() full_true.name = 'complete_true_distro' gen_distro.multiply(eff_hist) gen_distro.name = 'true_distribution' myunfolding.truth = gen_distro if opts.cov_matrix != 'none': if 'toy' in opts.fit_file: input_cov_matrix = make_cov_matrix( getattr(data_file, data_file_basedir).correlation_matrix, getattr(data_file, data_file_dir).tt_right ) input_corr_matrix = make_corr_matrix( getattr(data_file, data_file_basedir).correlation_matrix, getattr(data_file, data_file_dir).tt_right ) else: input_cov_matrix = make_cov_matrix( data_file.correlation_matrix, getattr(data_file, data_file_dir).tt_right ) input_corr_matrix = make_corr_matrix( data_file.correlation_matrix, getattr(data_file, data_file_dir).tt_right ) input_cov_matrix.name = 'input_cov_matrix' input_corr_matrix.name = 'input_corr_matrix' myunfolding.cov_matrix = input_cov_matrix myunfolding.InitUnfolder() hdata = myunfolding.measured # Duplicate. Remove! #plot covariance matrix plotter.pad.cd() input_corr_matrix.SetStats(False) input_corr_matrix.Draw('colz') plotter.pad.SetLogz(True) plotter.save('correlation_matrix.png') #optimize best_taus = {} if tau >= 0: best_taus['External'] = tau else: t_min, t_max = eval(opts.tau_range) best_l, l_curve, graph_x, graph_y = myunfolding.DoScanLcurve(100, t_min, t_max) best_taus['L_curve'] = best_l l_curve.SetName('lcurve') l_curve.name = 'lcurve' graph_x.name = 'l_scan_x' graph_y.name = 'l_scan_y' l_tau = math.log10(best_l) points = [(graph_x.GetX()[i], graph_x.GetY()[i], graph_y.GetY()[i]) for i in xrange(graph_x.GetN())] best = [(x,y) for i, x, y in points if l_tau == i] graph_best = plotting.Graph(1) graph_best.SetPoint(0, *best[0]) plotter.reset() plotter.overlay( [l_curve, graph_best], **styles['scan_overlay'] ) plotter.canvas.name = 'L_curve' info = plotter.make_text_box('#tau = %.5f' % best_l, 'NE') #ROOT.TPaveText(0.65,1-canvas.GetTopMargin(),1-canvas.GetRightMargin(),0.999, "brNDC") info.Draw() canvas.Update() plotter.set_subdir('L_curve') plotter.save() modes = ['RhoMax', 'RhoSquareAvg', 'RhoAvg'] for mode in modes: plotter.set_subdir(mode) best_tau, tau_curve, index_best = myunfolding.DoScanTau(100, t_min, t_max, mode) best_taus[mode] = best_tau tau_curve.SetName('%s_scan' % mode) tau_curve.SetMarkerStyle(1) points = [(tau_curve.GetX()[i], tau_curve.GetY()[i]) for i in xrange(tau_curve.GetN())] best = [points[index_best]] graph_best = plotting.Graph(1) graph_best.SetPoint(0, *best[0]) plotter.overlay( [tau_curve, graph_best], **styles['scan_overlay'] ) plotter.canvas.name = 'c'+tau_curve.GetName() info = plotter.make_text_box('#tau = %.5f' % best_tau, 'NE') #ROOT.TPaveText(0.65,1-canvas.GetTopMargin(),1-canvas.GetRightMargin(),0.999, "brNDC") info.Draw() plotter.save('Tau_curve') #force running without regularization best_taus['NoReg'] = 0 for name, best_tau in best_taus.iteritems(): log.info('best tau option for %s: %.3f' % (name, best_tau)) if opts.runHandmade: #hand-made tau scan plotter.set_subdir('Handmade') unc_scan, bias_scan = myunfolding.scan_tau( 200, 10**-6, 50, os.path.join(outdir, 'Handmade', 'scan_info.root')) bias_scan.name = 'Handmade' bias_scan.title = 'Avg. Bias - Handmade' plotter.plot(bias_scan, logx=True, logy=True, **styles['dots']) plotter.save('bias_scan') unc_scan.name = 'Handmade' unc_scan.title = 'Avg. Unc. - Handmade' plotter.plot(unc_scan, logx=True, logy=True, **styles['dots']) plotter.save('unc_scan') bias_points = [(bias_scan.GetX()[i], bias_scan.GetY()[i]) for i in xrange(bias_scan.GetN())] unc_points = [(unc_scan.GetX()[i], unc_scan.GetY()[i]) for i in xrange(unc_scan.GetN())] fom_scan = plotting.Graph(unc_scan.GetN()) for idx, info in enumerate(zip(bias_points, unc_points)): binfo, uinfo = info tau, bias = binfo _, unc = uinfo fom_scan.SetPoint(idx, tau, quad(bias, unc)) fom_scan.name = 'Handmade' fom_scan.title = 'Figure of merit - Handmade' plotter.plot(fom_scan, logx=True, logy=True, **styles['dots']) plotter.save('fom_scan') to_save = [] outfile = rootpy.io.root_open(os.path.join(outdir, opts.out),'recreate') for name, best_tau in best_taus.iteritems(): plotter.set_subdir(name) method_dir = outfile.mkdir(name) myunfolding.tau = best_tau hdata_unfolded = myunfolding.unfolded #apply phase space efficiency corrections hdata_unfolded_ps_corrected = hdata_unfolded.Clone() hdata_unfolded_ps_corrected.Divide(eff_hist) hdata_refolded = myunfolding.refolded #apply purity corrections hdata_refolded_wpurity = hdata_refolded.Clone() error_matrix = myunfolding.ematrix_total hcorrelations = myunfolding.rhoI_total hbias = myunfolding.bias #canvas = overlay(myunfolding.truth, hdata_unfolded) myunfolding.truth.xaxis.title = xaxislabel hdata_unfolded.xaxis.title = xaxislabel n_neg_bins = 0 for ibin in range(1,hdata_unfolded.GetNbinsX()+1): if hdata_unfolded.GetBinContent(ibin) < 0: n_neg_bins = n_neg_bins + 1 hn_neg_bins = plotting.Hist( 2,-1, 1, name = 'nneg_bins', title = 'Negative bins in ' + hdata_unfolded.GetName()+ ';Bin sign; N_{bins}' ) hn_neg_bins.SetBinContent(1,n_neg_bins) hn_neg_bins.SetBinContent(2,hdata_unfolded.GetNbinsX()-n_neg_bins) plotter.plot( hn_neg_bins, writeTo='unfolding_bins_sign', **styles['line'] ) leg = LegendDefinition( title=name, labels=['Truth','Unfolded'], position='ne' ) sumofpulls = 0 sumofratios = 0 for ibin in range(1,myunfolding.truth.GetNbinsX()+1): binContent1 = myunfolding.truth.GetBinContent(ibin) binContent2 = hdata_unfolded.GetBinContent(ibin) binError1 = myunfolding.truth.GetBinError(ibin) binError2 = hdata_unfolded.GetBinError(ibin) error = sqrt(binError1*binError1 + binError2*binError2) if error != 0: pull = (binContent2-binContent1)/error else: pull = 9999 if binContent1 != 0: ratio = binContent2/binContent1 sumofpulls = sumofpulls + pull sumofratios = sumofratios + ratio sumofpulls = sumofpulls / myunfolding.truth.GetNbinsX() sumofratios = sumofratios / myunfolding.truth.GetNbinsX() hsum_of_pulls = plotting.Hist( 1, 0, 1, name = 'sum_of_pulls_' + hdata_unfolded.GetName(), title = 'Sum of pulls wrt truth for ' + hdata_unfolded.GetName()+ ';None; #Sigma(pulls) / N_{bins}' ) hsum_of_pulls[1].value = sumofpulls plotter.plot(hsum_of_pulls, writeTo='unfolding_sum_of_pulls', **styles['line']) hsum_of_ratios = plotting.Hist( 1, 0, 1, name = 'sum_of_ratios_' + hdata_unfolded.GetName(), title = 'Sum of ratios wrt truth for ' + hdata_unfolded.GetName()+ ';None; #Sigma(ratios) / N_{bins}' ) hsum_of_ratios[1].value = sumofratios plotter.plot(hsum_of_ratios, writeTo='unfolding_sum_of_ratios', **styles['line']) plotter.overlay_and_compare( [myunfolding.truth], hdata_unfolded, legend_def=leg, writeTo='unfolding_pull', **styles['data_overlay'] ) plotter.overlay_and_compare( [myunfolding.truth], hdata_unfolded, legend_def=leg, method='ratio', writeTo='unfolding_ratio', **styles['data_overlay'] ) plotter.overlay_and_compare( [full_true], hdata_unfolded_ps_corrected, legend_def=leg, writeTo='unfolding_pull', **styles['data_overlay'] ) plotter.overlay_and_compare( [full_true], hdata_unfolded_ps_corrected, legend_def=leg, method='ratio', writeTo='unfolding_ratio', **styles['data_overlay'] ) nbins = myunfolding.measured.GetNbinsX() input_distro = getattr(resp_file, opts.var).prefit_distribution leg = LegendDefinition(title=name, position='ne') myunfolding.measured.xaxis.title = xaxislabel hdata_refolded.xaxis.title = xaxislabel myunfolding.measured.drawstyle = 'e1' style = {'linestyle':[1, 0], 'markerstyle':[20, 20], 'markercolor':[2,4], 'linecolor':[2,4], 'drawstyle' : ['hist', 'e1'], 'legendstyle' : ['l', 'p'], 'title' : ['Refolded', 'Reco'] } plotter.overlay_and_compare( [hdata_refolded], myunfolding.measured, legend_def=leg, writeTo='refolded_pull', **style ) plotter.overlay_and_compare( [hdata_refolded], myunfolding.measured, legend_def=leg, method='ratio', writeTo='refolded_ratio', **style ) style = {'linestyle':[1,0,0], 'markerstyle':[20,21,21], 'markercolor':[2,4,1], 'linecolor':[2,4,1], 'drawstyle' : ['hist', 'e1', 'e1'], 'legendstyle' : ['l', 'p', 'p'], 'title' : ['Refolded', 'Reco', 'Input'] } measured_no_correction.drawstyle = 'e1' plotter.overlay_and_compare( [hdata_refolded_wpurity, measured_no_correction], input_distro, legend_def=leg, writeTo='refolded_wpurity_pull', **style ) plotter.overlay_and_compare( [hdata_refolded_wpurity, measured_no_correction], input_distro, legend_def=leg, method='ratio', writeTo='refolded_wpurity_ratio', **style ) method_dir.WriteTObject(hdata_unfolded, 'hdata_unfolded') method_dir.WriteTObject(hdata_unfolded_ps_corrected, 'hdata_unfolded_ps_corrected') method_dir.WriteTObject(hdata_refolded, 'hdata_refolded') method_dir.WriteTObject(hdata_refolded_wpurity, 'hdata_refolded_wpurity') method_dir.WriteTObject(error_matrix, 'error_matrix') method_dir.WriteTObject(hbias, 'bias') method_dir.WriteTObject(hn_neg_bins, 'hn_neg_bins') method_dir.WriteTObject(hsum_of_pulls, 'hsum_of_pulls') method_dir.WriteTObject(hsum_of_ratios, 'hsum_of_ratios') htruth = myunfolding.truth hmatrix = myunfolding.matrix hmeasured = myunfolding.measured #with rootpy.io.root_open(os.path.join(outdir, opts.out),'recreate') as outfile: outfile.cd() to_save.extend([ measured_no_correction, eff_hist, purity_hist, full_true, myunfolding.truth, ## 4 myunfolding.measured, ## 5 myunfolding.matrix,]) ## 6 if opts.tau < 0: to_save.extend([ l_curve, ## 9 tau_curve, ## 10 graph_x, graph_y ]) if opts.cov_matrix != 'none': to_save.extend([input_cov_matrix]) to_save.extend([input_corr_matrix]) for i, j in enumerate(to_save): log.debug('Saving %s as %s' % (j.name, j.GetName())) j.Write() getattr(resp_file, opts.var).reco_distribution.Write() getattr(resp_file, opts.var).prefit_distribution.Write() json = ROOT.TText(0., 0., prettyjson.dumps(best_taus)) outfile.WriteTObject(json, 'best_taus') myunfolding.write_to(outfile, 'urunfolder') outfile.Close()
wrong = [ 'semilep_wrong', 'semilep_right_thad', 'semilep_right_tlep', 'other_tt_decay' ] output_name_base = 'prob' def merge_views(inview, subnames): subviews = [views.SubdirectoryView(inview, i) for i in subnames] return views.SumView(*subviews) jobid = os.environ['jobid'] input_files = glob.glob('results/%s/permProbComputer/ttJets*.root' % jobid) plotter = BasePlotter('plots/%s/permutations' % jobid, ) plotter.reset() log.info('found %d input files' % len(input_files)) for fname in input_files: sample = extract_sample(fname) tfile = io.root_open(fname) test_dir = tfile.Get(right[0]) right_view = merge_views(tfile, right) wrong_view = merge_views(tfile, wrong) #write output file outname = 'inputs/%s/INPUT/%s_%s.root' % (jobid, output_name_base, sample) with io.root_open(outname, 'w') as out: for shift in systematics: if not hasattr(test_dir, shift): log.warning('I could not find %s in %s, skipping systematic' %