def ratio_root(num, num_err_sq, denom, denom_err_sq): import rootpy.plotting as rp # rootpy seems to switch this on, flooding the terminal with debugging output import logging logging.getLogger("matplotlib.font_manager").setLevel(logging.INFO) # Set up the histograms top = rp.Hist(len(num), 0, 1) bottom = rp.Hist(len(num), 0, 1) for i, (d, n, d_err, n_err) in enumerate(zip(denom, num, denom_err_sq, num_err_sq)): bottom[i + 1] = (d, d_err) top[i + 1] = (n, n_err) # Do the actual division div = rp.Graph.divide(top, bottom, "e0 midp pois") # Convert this back to the array of points for the ratio plots ratios = np.zeros_like(num) low = np.zeros_like(num) upper = np.zeros_like(num) filled_indices = [top.FindBin(point.x.value) - 1 for point in div] ratios[filled_indices] = [point.y.value for point in div] low[filled_indices] = [point.y.error_low for point in div] upper[filled_indices] = [point.y.error_hi for point in div] return ratios, low, upper
def set_dyn_binning(va, lo, up, n, err=0.15): #eps=0.0001 #va = va[(np.abs(va)>up_l) & (va!=999.9)] #va = np.clip(va,lo+eps,up-eps) binning = np.linspace(lo, up, n) h1 = rplot.Hist(binning) map(h1.Fill, va) h1.Scale(1 / (h1.Integral(0, h1.GetNbinsX() + 1))) #print(binning) for i in range(n - 1, 1, -1): #print(i," ",binning[i]) #if (h1.GetBinContent(i)==0): #print(0) # continue #print(h1.GetBinError(i)/h1.GetBinContent(i)) if (h1.GetBinContent(i) == 0): binning = np.delete(binning, i - 1) h1 = rplot.Hist(binning) map(h1.Fill, va) h1.Scale(1 / (h1.Integral(0, h1.GetNbinsX() + 1))) continue if (h1.GetBinError(i) / h1.GetBinContent(i)) > err: binning = np.delete(binning, i - 1) h1 = rplot.Hist(binning) map(h1.Fill, va) h1.Scale(1 / (h1.Integral(0, h1.GetNbinsX() + 1))) continue return binning
def fit_shape(self, histo, model, x_range, fitopt='IRMENS'): '''Performs a fit with ROOT libraries. Model is a tuple defining the function to be used and gets parsed by Plotter.parse_formula''' tf1 = self.parse_formula(*model) tf1.SetRange(*x_range) tf1.SetLineColor(ROOT.EColor.kAzure) tf1.SetLineWidth(3) result = histo.Fit(tf1, fitopt) #WL # "WL" Use Loglikelihood method and bin contents are not integer, # i.e. histogram is weighted (must have Sumw2() set) # "Q" Quiet mode (minimum printing) # "E" Perform better Errors estimation using Minos technique # "M" More. Improve fit results. # It uses the IMPROVE command of TMinuit (see TMinuit::mnimpr). # This algorithm attempts to improve the found local minimum by searching for a # better one. # "R" Use the Range specified in the function range # "N" Do not store the graphics function, do not draw # "S" The result of the fit is returned in the TFitResultPtr numpoints = tf1.GetNpx( ) #number of points in which the func is evaluated func_hist = plotting.Hist(numpoints, *x_range) (ROOT.TVirtualFitter.GetFitter()).GetConfidenceIntervals(func_hist) func_hist.linewidth = 0 func_hist.fillcolor = ROOT.EColor.kAzure - 9 func_hist.fillstyle = 3013 func_hist.markersize = 0 func_hist.Draw('same e3') tf1.Draw('same') self.keep.extend([tf1, func_hist]) return tf1
def fill_hist(vals): low = min(vals) low = low*0.8 if low > 0 else low*1.2 hi = max(vals) hi *= 1.2 if hi > 0 else 0.8 if low == hi: #0 == 0 low, hi = -0.1, 0.1 hist = plotting.Hist(50, low, hi, title='') for v in vals: hist.Fill(v) return hist
def bins_projectionsX(histo2D): projections = [] oldbinx = [float(histo2D.GetXaxis().GetBinLowEdge(1))] oldbinx.extend(float(histo2D.GetXaxis().GetBinUpEdge(x)) for x in xrange(1, histo2D.GetNbinsX()+1)) for i in range(1, histo2D.GetNbinsY()+1): projections.append(plotting.Hist(oldbinx)) projections[-1].markerstyle = 19+i for j in range(1, histo2D.GetNbinsX()+1): projections[-1].SetBinContent(j, histo2D.GetBinContent(j,i)) projections[-1].SetBinError(j, histo2D.GetBinError(j,i)) return projections
def apply_view(self, histo): ret = None dimensions = histo.get_dimension() if dimensions == 1: xbins = histo.get_nbins_x() edges = [ histo.xaxis.get_bin_low_edge(i) for i in range(1, xbins + 2) ] lower_edge = edges[0] - histo.xaxis.get_bin_width(1) upped_edge = edges[-1] + histo.xaxis.get_bin_width(xbins) ret = plt.Hist([lower_edge] + edges + [upped_edge]) ret.title = histo.title ret.decorate(**histo.decorators) ret.xaxis.title = histo.xaxis.title ret.yaxis.title = histo.yaxis.title for nbin, obin in zip(ret[1:-1], histo): nbin.value = obin.value nbin.error = obin.error elif dimensions == 2: xbins = histo.get_nbins_x() xedges = [ histo.xaxis.get_bin_low_edge(i) for i in range(1, xbins + 2) ] lower_edge = xedges[0] - histo.xaxis.get_bin_width(1) upped_edge = xedges[-1] + histo.xaxis.get_bin_width(xbins) xedges = [lower_edge] + xedges + [upped_edge] ybins = histo.get_nbins_y() yedges = [ histo.yaxis.get_bin_low_edge(i) for i in range(1, ybins + 2) ] lower_edge = yedges[0] - histo.yaxis.get_bin_width(1) upped_edge = yedges[-1] + histo.yaxis.get_bin_width(ybins) yedges = [lower_edge] + yedges + [upped_edge] ret = plt.Hist2D(xedges, yedges, title=histo.title, **histo.decorators) ret.xaxis.title = histo.xaxis.title ret.yaxis.title = histo.yaxis.title for x_idx in range(1, xbins + 1): for y_idx in range(1, ybins + 1): ret[x_idx + 1, y_idx + 1].value = histo[x_idx, y_idx].value ret[x_idx + 1, y_idx + 1].error = histo[x_idx, y_idx].error else: ret = histo return ret
def linearize(histo, overflow=False): if histo.DIM != 2: raise RuntimeError( 'the histogram I got has dimension %d, which is not supported' % histogram.DIM) bx = histo.GetNbinsX() by = histo.GetNbinsY() nbins = (bx + 2) * (by * 2) if overflow else bx * by ret = plotting.Hist(nbins, 0, nbins) xran = range(0, bx + 2) if overflow else range(1, bx + 1) yran = range(0, by + 2) if overflow else range(1, by + 1) for idx, xy in enumerate(product(xran, yran)): x, y = xy ret[idx + 1].value = histo[x, y].value ret[idx + 1].error = histo[x, y].error ret.entries = histo.entries return ret
def run_unfolder(itoy = 0, outdir = opts.dir, tau = opts.tau): styles = { 'scan_overlay' : { 'markerstyle':[0, 29], 'linecolor':[1,1], 'markercolor':[1,2], 'drawstyle':['ALP', 'P'], 'markersize':[0,3] }, 'data_overlay' : { 'linestyle' : [1,0], 'markerstyle':[0,21], 'linecolor' : [2,1], 'markercolor':[2,1], 'drawstyle' : ['hist', 'p'], 'legendstyle' : ['l', 'p'] }, 'dots' : { 'markerstyle' : 20, 'markersize' : 2, 'linestyle' : 0, 'drawstyle' : 'P' }, 'line' : { 'linestyle':1, 'markerstyle':0 }, } plotter = BasePlotter( outdir, defaults = { 'clone' : False, 'show_title' : True, } ) #canvas = plotting.Canvas(name='adsf', title='asdf') if "toy" in opts.fit_file: data_file_basedir = 'toy_' + str(itoy) data_file_dir = data_file_basedir + '/' + opts.var else: data_file_dir = opts.var xaxislabel = set_pretty_label(opts.var) scale = 1. if opts.no_area_constraint: area_constraint='None' else: area_constraint='Area' myunfolding = URUnfolding(regmode = opts.reg_mode, constraint = area_constraint) ## Migration matrix preprocessing ## remove oflow bins var_dir = getattr(resp_file, opts.var) migration_matrix = var_dir.migration_matrix for bin in migration_matrix: if bin.overflow: bin.value = 0 bin.error = 0 myunfolding.matrix = migration_matrix thruth_unscaled = var_dir.thruth_unscaled reco_unscaled = var_dir.reco_unscaled project_reco = 'X' if myunfolding.orientation == 'Vertical' else 'Y' project_gen = 'Y' if myunfolding.orientation == 'Vertical' else 'X' reco_project = rootpy.asrootpy( getattr(migration_matrix, 'Projection%s' % project_reco)() ) gen_project = rootpy.asrootpy( getattr(migration_matrix, 'Projection%s' % project_gen)() ) if gen_project.Integral() < thruth_unscaled.Integral(): eff_correction = ROOT.TGraphAsymmErrors(gen_project, thruth_unscaled) elif gen_project.Integral() == thruth_unscaled.Integral(): eff_correction = None else: log.warning( 'Efficiency correction: The visible part of the migration matrix' ' has a larger integral than the full one! (%.3f vs. %.3f).\n' 'It might be a rounding error, but please check!'\ % (reco_project.Integral(), reco_unscaled.Integral()) ) eff_correction = None if reco_project.Integral() < reco_unscaled.Integral(): purity_correction = ROOT.TGraphAsymmErrors(reco_project, reco_unscaled) elif reco_project.Integral() == reco_unscaled.Integral(): purity_correction = None else: log.warning( 'Purity correction: The visible part of the migration matrix' ' has a larger integral than the full one! (%.3f vs. %.3f).\n' 'It might be a rounding error, but please check!'\ % (reco_project.Integral(), reco_unscaled.Integral()) ) purity_correction = None #flush graphs into histograms (easier to handle) eff_hist = gen_project.Clone() eff_hist.reset() eff_hist.name = 'eff_hist' if eff_correction: for idx in range(eff_correction.GetN()): eff_hist[idx+1].value = eff_correction.GetY()[idx] eff_hist[idx+1].error = max( eff_correction.GetEYhigh()[idx], eff_correction.GetEYlow()[idx] ) else: for b in eff_hist: b.value = 1. b.error = 0. purity_hist = reco_project.Clone() purity_hist.reset() purity_hist.name = 'purity_hist' if purity_correction: for idx in range(purity_correction.GetN()): bin.value = purity_correction.GetY()[idx] bin.error = max( purity_correction.GetEYhigh()[idx], purity_correction.GetEYlow()[idx] ) else: for bin in purity_hist: bin.value = 1. bin.error = 0. #Get measured histogram measured = None if opts.use_reco_truth: log.warning("Using the MC reco distribution for the unfolding!") measured = getattr(resp_file, opts.var).reco_distribution else: measured = getattr(data_file, data_file_dir).tt_right measured_no_correction = measured.Clone() measured_no_correction.name = 'measured_no_correction' measured.name = 'measured' measured.multiply(purity_hist) myunfolding.measured = measured #get gen-level distribution gen_distro = getattr(resp_file, opts.var).true_distribution.Clone() full_true = gen_distro.Clone() full_true.name = 'complete_true_distro' gen_distro.multiply(eff_hist) gen_distro.name = 'true_distribution' myunfolding.truth = gen_distro if opts.cov_matrix != 'none': if 'toy' in opts.fit_file: input_cov_matrix = make_cov_matrix( getattr(data_file, data_file_basedir).correlation_matrix, getattr(data_file, data_file_dir).tt_right ) input_corr_matrix = make_corr_matrix( getattr(data_file, data_file_basedir).correlation_matrix, getattr(data_file, data_file_dir).tt_right ) else: input_cov_matrix = make_cov_matrix( data_file.correlation_matrix, getattr(data_file, data_file_dir).tt_right ) input_corr_matrix = make_corr_matrix( data_file.correlation_matrix, getattr(data_file, data_file_dir).tt_right ) input_cov_matrix.name = 'input_cov_matrix' input_corr_matrix.name = 'input_corr_matrix' myunfolding.cov_matrix = input_cov_matrix myunfolding.InitUnfolder() hdata = myunfolding.measured # Duplicate. Remove! #plot covariance matrix plotter.pad.cd() input_corr_matrix.SetStats(False) input_corr_matrix.Draw('colz') plotter.pad.SetLogz(True) plotter.save('correlation_matrix.png') #optimize best_taus = {} if tau >= 0: best_taus['External'] = tau else: t_min, t_max = eval(opts.tau_range) best_l, l_curve, graph_x, graph_y = myunfolding.DoScanLcurve(100, t_min, t_max) best_taus['L_curve'] = best_l l_curve.SetName('lcurve') l_curve.name = 'lcurve' graph_x.name = 'l_scan_x' graph_y.name = 'l_scan_y' l_tau = math.log10(best_l) points = [(graph_x.GetX()[i], graph_x.GetY()[i], graph_y.GetY()[i]) for i in xrange(graph_x.GetN())] best = [(x,y) for i, x, y in points if l_tau == i] graph_best = plotting.Graph(1) graph_best.SetPoint(0, *best[0]) plotter.reset() plotter.overlay( [l_curve, graph_best], **styles['scan_overlay'] ) plotter.canvas.name = 'L_curve' info = plotter.make_text_box('#tau = %.5f' % best_l, 'NE') #ROOT.TPaveText(0.65,1-canvas.GetTopMargin(),1-canvas.GetRightMargin(),0.999, "brNDC") info.Draw() canvas.Update() plotter.set_subdir('L_curve') plotter.save() modes = ['RhoMax', 'RhoSquareAvg', 'RhoAvg'] for mode in modes: plotter.set_subdir(mode) best_tau, tau_curve, index_best = myunfolding.DoScanTau(100, t_min, t_max, mode) best_taus[mode] = best_tau tau_curve.SetName('%s_scan' % mode) tau_curve.SetMarkerStyle(1) points = [(tau_curve.GetX()[i], tau_curve.GetY()[i]) for i in xrange(tau_curve.GetN())] best = [points[index_best]] graph_best = plotting.Graph(1) graph_best.SetPoint(0, *best[0]) plotter.overlay( [tau_curve, graph_best], **styles['scan_overlay'] ) plotter.canvas.name = 'c'+tau_curve.GetName() info = plotter.make_text_box('#tau = %.5f' % best_tau, 'NE') #ROOT.TPaveText(0.65,1-canvas.GetTopMargin(),1-canvas.GetRightMargin(),0.999, "brNDC") info.Draw() plotter.save('Tau_curve') #force running without regularization best_taus['NoReg'] = 0 for name, best_tau in best_taus.iteritems(): log.info('best tau option for %s: %.3f' % (name, best_tau)) if opts.runHandmade: #hand-made tau scan plotter.set_subdir('Handmade') unc_scan, bias_scan = myunfolding.scan_tau( 200, 10**-6, 50, os.path.join(outdir, 'Handmade', 'scan_info.root')) bias_scan.name = 'Handmade' bias_scan.title = 'Avg. Bias - Handmade' plotter.plot(bias_scan, logx=True, logy=True, **styles['dots']) plotter.save('bias_scan') unc_scan.name = 'Handmade' unc_scan.title = 'Avg. Unc. - Handmade' plotter.plot(unc_scan, logx=True, logy=True, **styles['dots']) plotter.save('unc_scan') bias_points = [(bias_scan.GetX()[i], bias_scan.GetY()[i]) for i in xrange(bias_scan.GetN())] unc_points = [(unc_scan.GetX()[i], unc_scan.GetY()[i]) for i in xrange(unc_scan.GetN())] fom_scan = plotting.Graph(unc_scan.GetN()) for idx, info in enumerate(zip(bias_points, unc_points)): binfo, uinfo = info tau, bias = binfo _, unc = uinfo fom_scan.SetPoint(idx, tau, quad(bias, unc)) fom_scan.name = 'Handmade' fom_scan.title = 'Figure of merit - Handmade' plotter.plot(fom_scan, logx=True, logy=True, **styles['dots']) plotter.save('fom_scan') to_save = [] outfile = rootpy.io.root_open(os.path.join(outdir, opts.out),'recreate') for name, best_tau in best_taus.iteritems(): plotter.set_subdir(name) method_dir = outfile.mkdir(name) myunfolding.tau = best_tau hdata_unfolded = myunfolding.unfolded #apply phase space efficiency corrections hdata_unfolded_ps_corrected = hdata_unfolded.Clone() hdata_unfolded_ps_corrected.Divide(eff_hist) hdata_refolded = myunfolding.refolded #apply purity corrections hdata_refolded_wpurity = hdata_refolded.Clone() error_matrix = myunfolding.ematrix_total hcorrelations = myunfolding.rhoI_total hbias = myunfolding.bias #canvas = overlay(myunfolding.truth, hdata_unfolded) myunfolding.truth.xaxis.title = xaxislabel hdata_unfolded.xaxis.title = xaxislabel n_neg_bins = 0 for ibin in range(1,hdata_unfolded.GetNbinsX()+1): if hdata_unfolded.GetBinContent(ibin) < 0: n_neg_bins = n_neg_bins + 1 hn_neg_bins = plotting.Hist( 2,-1, 1, name = 'nneg_bins', title = 'Negative bins in ' + hdata_unfolded.GetName()+ ';Bin sign; N_{bins}' ) hn_neg_bins.SetBinContent(1,n_neg_bins) hn_neg_bins.SetBinContent(2,hdata_unfolded.GetNbinsX()-n_neg_bins) plotter.plot( hn_neg_bins, writeTo='unfolding_bins_sign', **styles['line'] ) leg = LegendDefinition( title=name, labels=['Truth','Unfolded'], position='ne' ) sumofpulls = 0 sumofratios = 0 for ibin in range(1,myunfolding.truth.GetNbinsX()+1): binContent1 = myunfolding.truth.GetBinContent(ibin) binContent2 = hdata_unfolded.GetBinContent(ibin) binError1 = myunfolding.truth.GetBinError(ibin) binError2 = hdata_unfolded.GetBinError(ibin) error = sqrt(binError1*binError1 + binError2*binError2) if error != 0: pull = (binContent2-binContent1)/error else: pull = 9999 if binContent1 != 0: ratio = binContent2/binContent1 sumofpulls = sumofpulls + pull sumofratios = sumofratios + ratio sumofpulls = sumofpulls / myunfolding.truth.GetNbinsX() sumofratios = sumofratios / myunfolding.truth.GetNbinsX() hsum_of_pulls = plotting.Hist( 1, 0, 1, name = 'sum_of_pulls_' + hdata_unfolded.GetName(), title = 'Sum of pulls wrt truth for ' + hdata_unfolded.GetName()+ ';None; #Sigma(pulls) / N_{bins}' ) hsum_of_pulls[1].value = sumofpulls plotter.plot(hsum_of_pulls, writeTo='unfolding_sum_of_pulls', **styles['line']) hsum_of_ratios = plotting.Hist( 1, 0, 1, name = 'sum_of_ratios_' + hdata_unfolded.GetName(), title = 'Sum of ratios wrt truth for ' + hdata_unfolded.GetName()+ ';None; #Sigma(ratios) / N_{bins}' ) hsum_of_ratios[1].value = sumofratios plotter.plot(hsum_of_ratios, writeTo='unfolding_sum_of_ratios', **styles['line']) plotter.overlay_and_compare( [myunfolding.truth], hdata_unfolded, legend_def=leg, writeTo='unfolding_pull', **styles['data_overlay'] ) plotter.overlay_and_compare( [myunfolding.truth], hdata_unfolded, legend_def=leg, method='ratio', writeTo='unfolding_ratio', **styles['data_overlay'] ) plotter.overlay_and_compare( [full_true], hdata_unfolded_ps_corrected, legend_def=leg, writeTo='unfolding_pull', **styles['data_overlay'] ) plotter.overlay_and_compare( [full_true], hdata_unfolded_ps_corrected, legend_def=leg, method='ratio', writeTo='unfolding_ratio', **styles['data_overlay'] ) nbins = myunfolding.measured.GetNbinsX() input_distro = getattr(resp_file, opts.var).prefit_distribution leg = LegendDefinition(title=name, position='ne') myunfolding.measured.xaxis.title = xaxislabel hdata_refolded.xaxis.title = xaxislabel myunfolding.measured.drawstyle = 'e1' style = {'linestyle':[1, 0], 'markerstyle':[20, 20], 'markercolor':[2,4], 'linecolor':[2,4], 'drawstyle' : ['hist', 'e1'], 'legendstyle' : ['l', 'p'], 'title' : ['Refolded', 'Reco'] } plotter.overlay_and_compare( [hdata_refolded], myunfolding.measured, legend_def=leg, writeTo='refolded_pull', **style ) plotter.overlay_and_compare( [hdata_refolded], myunfolding.measured, legend_def=leg, method='ratio', writeTo='refolded_ratio', **style ) style = {'linestyle':[1,0,0], 'markerstyle':[20,21,21], 'markercolor':[2,4,1], 'linecolor':[2,4,1], 'drawstyle' : ['hist', 'e1', 'e1'], 'legendstyle' : ['l', 'p', 'p'], 'title' : ['Refolded', 'Reco', 'Input'] } measured_no_correction.drawstyle = 'e1' plotter.overlay_and_compare( [hdata_refolded_wpurity, measured_no_correction], input_distro, legend_def=leg, writeTo='refolded_wpurity_pull', **style ) plotter.overlay_and_compare( [hdata_refolded_wpurity, measured_no_correction], input_distro, legend_def=leg, method='ratio', writeTo='refolded_wpurity_ratio', **style ) method_dir.WriteTObject(hdata_unfolded, 'hdata_unfolded') method_dir.WriteTObject(hdata_unfolded_ps_corrected, 'hdata_unfolded_ps_corrected') method_dir.WriteTObject(hdata_refolded, 'hdata_refolded') method_dir.WriteTObject(hdata_refolded_wpurity, 'hdata_refolded_wpurity') method_dir.WriteTObject(error_matrix, 'error_matrix') method_dir.WriteTObject(hbias, 'bias') method_dir.WriteTObject(hn_neg_bins, 'hn_neg_bins') method_dir.WriteTObject(hsum_of_pulls, 'hsum_of_pulls') method_dir.WriteTObject(hsum_of_ratios, 'hsum_of_ratios') htruth = myunfolding.truth hmatrix = myunfolding.matrix hmeasured = myunfolding.measured #with rootpy.io.root_open(os.path.join(outdir, opts.out),'recreate') as outfile: outfile.cd() to_save.extend([ measured_no_correction, eff_hist, purity_hist, full_true, myunfolding.truth, ## 4 myunfolding.measured, ## 5 myunfolding.matrix,]) ## 6 if opts.tau < 0: to_save.extend([ l_curve, ## 9 tau_curve, ## 10 graph_x, graph_y ]) if opts.cov_matrix != 'none': to_save.extend([input_cov_matrix]) to_save.extend([input_corr_matrix]) for i, j in enumerate(to_save): log.debug('Saving %s as %s' % (j.name, j.GetName())) j.Write() getattr(resp_file, opts.var).reco_distribution.Write() getattr(resp_file, opts.var).prefit_distribution.Write() json = ROOT.TText(0., 0., prettyjson.dumps(best_taus)) outfile.WriteTObject(json, 'best_taus') myunfolding.write_to(outfile, 'urunfolder') outfile.Close()
# same was predicted for wiggle unchanged_predictions[pred_class].append(max_val) else: # other was predicted for wiggle changed_predictions[pred_class].append(max_val) unchangeds.append(unchanged_predictions) changeds.append(changed_predictions) for i_node in range(len(event_classes)): # loop over samples unchanged_hists = [] changed_hists = [] for i_sample in range(n_samples): # generate unchanged histogram values = unchangeds[i_sample][i_node] unc_h = rp.Hist(bins, *bin_range, title="unchanged prediction") unc_h.markersize = 0 unc_h.legendstyle = "F" unc_h.fillstyle = "solid" unc_h.fillcolor = "green" unc_h.linecolor = "black" unc_h.fill_array(values) unchanged_hists.append(unc_h) # generate changed histogram values = changeds[i_sample][i_node] ch_h = rp.Hist(bins, *bin_range, title="changed prediction") ch_h.markersize = 0 ch_h.legendstyle = "F" ch_h.fillstyle = "solid" ch_h.fillcolor = "darkred"
def plot_class_differences(self, log=False): pltstyle.init_plot_style() nbins = 20 bin_range = [0., 1.] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): node_index = self.data.class_translation[node_cls] # get outputs of node node_values = self.mainnet_predicted_vector[:, i] filtered_node_values = np.array([node_values[k] for k in range(len(node_values)) \ if self.predicted_classes[k] == node_index]) filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(node_values)) \ if self.predicted_classes[k] == node_index] histograms = [] first = True max_val = 0 # loop over other nodes and get those predictions for j, other_cls in enumerate(self.event_classes): if i == j: continue other_index = self.data.class_translation[other_cls] other_values = self.mainnet_predicted_vector[:, j] filtered_other_values = np.array([other_values[k] for k in range(len(other_values)) \ if self.predicted_classes[k] == node_index]) # get difference of predicted node value and other value diff_values = (filtered_node_values - filtered_other_values) / filtered_node_values hist = rp.Hist(nbins, *bin_range, title=str(other_cls) + " node", drawstyle="HIST E1 X0") pltstyle.set_sig_hist_style(hist, other_cls) hist.fill_array(diff_values, filtered_weights) if hist.GetMaximum() > max_val: max_val = hist.GetMaximum() if first: stack = rp.HistStack([hist], stacked=True) first_hist = hist first = False else: histograms.append(hist) # create canvas canvas = pltstyle.init_canvas() # drawing hists stack.SetMaximum(max_val * 1.3) rp.utils.draw([stack] + histograms, pad=canvas, xtitle="relative difference (" + str(node_cls) + " - X_node)/" + str(node_cls), ytitle="Events") if log: canvas.cd().SetLogy() # legend legend = pltstyle.init_legend([first_hist] + histograms) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) # save out_path = self.save_path + "/node_differences_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)
def plot_classification(self, log=False): ''' plot all events classified as one category ''' pltstyle.init_plot_style() nbins = 20 bin_range = [0., 1.] ttH_index = self.data.class_translation["ttHbb"] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): node_index = self.data.class_translation[node_cls] # get outputs of node out_values = self.mainnet_predicted_vector[:, i] # fill lists according to class bkg_hists = [] weight_integral = 0 # loop over all classes to fill hist according to predicted class for j, truth_cls in enumerate(self.event_classes): class_index = self.data.class_translation[truth_cls] # filter values per event class filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and self.predicted_classes[k] == node_index ] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and self.predicted_classes[k] == node_index ] if j == ttH_index: # signal in this node sig_values = filtered_values sig_label = str(truth_cls) sig_weights = filtered_weights else: # background in this node weight_integral += sum(filtered_weights) hist = rp.Hist(nbins, *bin_range, title=str(truth_cls)) pltstyle.set_bkg_hist_style(hist, truth_cls) hist.fill_array(filtered_values, filtered_weights) bkg_hists.append(hist) # stack backgrounds bkg_stack = rp.HistStack(bkg_hists, stacked=True, drawstyle="HIST E1 X0") bkg_stack.SetMinimum(1e-4) max_val = bkg_stack.GetMaximum() * 1.3 bkg_stack.SetMaximum(max_val) # plot signal weight_sum = sum(sig_weights) scale_factor = 1. * weight_integral / weight_sum sig_weights = [w * scale_factor for w in sig_weights] sig_title = sig_label + "*{:.3f}".format(scale_factor) sig_hist = rp.Hist(nbins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) # creatin canvas canvas = pltstyle.init_canvas() # drawing hists rp.utils.draw([bkg_stack, sig_hist], xtitle="Events predicted as " + node_cls, ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() # legend legend = pltstyle.init_legend(bkg_hists + [sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) print("S/B = {}".format(weight_sum / weight_integral)) # save out_path = self.save_path + "/predictions_{}.pdf".format(node_cls) pltstyle.save_canvas(canvas, out_path)
def plot_discriminators(self, log=False, cut_on_variable=None): ''' plot discriminators for output classes ''' pltstyle.init_plot_style() nbins = 50 bin_range = [0., 1.] # get some ttH specific info for plotting ttH_index = self.data.class_translation["ttHbb"] ttH_true_labels = self.data.get_ttH_flag() # apply cut to output node value if wanted if cut_on_variable: cut_class = cut_on_variable["class"] cut_value = cut_on_variable["val"] cut_index = self.data.class_translation[cut_class] cut_prediction = self.mainnet_predicted_vector[:, cut_index] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): # get outputs of node out_values = self.mainnet_predicted_vector[:, i] # calculate node specific ROC value node_ROC = roc_auc_score(ttH_true_labels, out_values) # fill lists according to class bkg_hists = [] weight_integral = 0 # loop over all classes to fill hist according to predicted class for j, truth_cls in enumerate(self.event_classes): class_index = self.data.class_translation[truth_cls] # filter values per event class if cut_on_variable: filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and cut_prediction[k] <= cut_value] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and cut_prediction[k] <= cut_value] else: filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index ] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index ] if j == ttH_index: # ttH signal sig_values = filtered_values sig_label = str(truth_cls) sig_weights = filtered_weights else: # background in this node weight_integral += sum(filtered_weights) hist = rp.Hist(nbins, *bin_range, title=str(truth_cls)) pltstyle.set_bkg_hist_style(hist, truth_cls) hist.fill_array(filtered_values, filtered_weights) bkg_hists.append(hist) # stack backgrounds bkg_stack = rp.HistStack(bkg_hists, stacked=True, drawstyle="HIST E1 X0") bkg_stack.SetMinimum(1e-4) max_val = bkg_stack.GetMaximum() * 1.3 bkg_stack.SetMaximum(max_val) # plot signal weight_sum = sum(sig_weights) scale_factor = 1. * weight_integral / weight_sum sig_weights = [w * scale_factor for w in sig_weights] sig_title = sig_label + "*{:.3f}".format(scale_factor) sig_hist = rp.Hist(nbins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) # creating canvas canvas = pltstyle.init_canvas() # drawing histograms rp.utils.draw([bkg_stack, sig_hist], xtitle=node_cls + " Discriminator", ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() # creating legend legend = pltstyle.init_legend(bkg_hists + [sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) # add ROC value to plot pltstyle.add_ROC_value(canvas, node_ROC) # save canvas out_path = self.save_path + "/discriminator_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)
def run_module(**kwargs): args = Struct(**kwargs) mkdir(args.out) canvas = plotting.Canvas() pars_regex = None if args.pars_regex: pars_regex = re.compile(args.pars_regex) sample_regex = None if args.sample_regex: sample_regex = re.compile(args.sample_regex) pars_out_regex = None if args.pars_out_regex: pars_out_regex = re.compile(args.pars_out_regex) sample_out_regex = None if args.sample_out_regex: sample_out_regex = re.compile(args.sample_out_regex) output_file = io.root_open('%s/output.root' % args.out, 'recreate') fpars_tdir = output_file.mkdir('floating_pars') pulls_tdir = output_file.mkdir('postfit_pulls') failed_fits = set() fit_statuses = plotting.Hist(10, -1.5, 8.5) with io.root_open(args.mlfit) as mlfit: failed_results = [] passes_results = [] pars = {} yields = {} first = True toys = [i.GetName() for i in mlfit.keys() if i.GetName().startswith('toy_')] if not args.oneshot else [None] log.info('examining %i toys' % len(toys)) prefit_nuis = None if args.useprefit: prefit_nuis = ArgSet(mlfit.nuisances_prefit) nfailed = 0 for toy in toys: toy_dir = mlfit.Get(toy) if not args.oneshot else mlfit keys = set([i.GetName() for i in toy_dir.GetListOfKeys()]) if 'norm_fit_s' not in keys or 'fit_s' not in keys: log.error('Fit %s failed to produce output!' % toy) failed_fits.add(toy) continue norms = ArgSet( toy_dir.Get( 'norm_fit_s' ) ) norms = [i for i in norms] fit_result = toy_dir.Get( 'fit_s' ) fit_pars = ArgList(fit_result.floatParsFinal()) if first: first = False for i in fit_pars: if pars_regex and not pars_regex.match(i.GetName()): continue if pars_out_regex and pars_out_regex.match(i.GetName()): continue pars[i.GetName()] = [] for i in norms: if sample_regex and not sample_regex.match(i.GetName()): continue if sample_out_regex and sample_out_regex.match(i.GetName()): continue yields[i.GetName()] = [] fit_statuses.Fill(fit_result.status()) fit_failed = any(i.getError() == 0 for i in fit_pars) or fit_result.status() != 0 if fit_failed: log.error('Fit %s failed to converge properly. It has status %i!' % (toy, fit_result.status())) nfailed+=1 failed_fits.add(toy) failed_results.append(fit_result) continue passes_results.append(fit_result) for i in norms: if i.GetName() in yields: yields[i.GetName()].append(i) for i in fit_pars: if i.GetName() in pars: pars[i.GetName()].append(i) if nfailed: log.error('There were %i fit failed!' % nfailed) with open('%s/info.txt' % args.out, 'w') as info: info.write('There were %i fit failed!\n' % nfailed) fit_statuses.Draw() canvas.SaveAs('%s/fit_status.png' % args.out) if not args.nopars: #Plots the post-fit distribution of the POI and nuisances out = os.path.join(args.out, 'floating_parameters') mkdir(out) for i, j in yields.iteritems(): make_hist(i, j, out, prefix='yield_') for i, j in pars.iteritems(): make_hist(i, j, out, prefix='par_') if not args.postpulls: #Plots the post-fit pulls (nuisance(post) - nuisance(pre))/unc(post) pulls_dir = os.path.join(args.out, 'postfit_pulls') mkdir(pulls_dir) ROOT.gStyle.SetOptFit(11111) singlenames=set() for name,value in pars.iteritems(): if pars_regex and not pars_regex.match(name): continue if pars_out_regex and pars_out_regex.match(i): continue singlenames.add(get_key(name)) pulls_mean_summary={} pulls_sigma_summary={} deltas_mean_summary={} deltas_sigma_summary={} for name in singlenames: nbins = 0 for fullname in pars: if name in fullname: nbins = nbins + 1 #print name, nbins try: hist = plotting.Hist(nbins, 0.5,nbins+0.5, name = "%s_pull_mean_summary" %name) pulls_mean_summary[name] = hist hist = plotting.Hist(nbins, 0.5,nbins+0.5, name = "%s_pull_sigma_summary" %name) pulls_sigma_summary[name] = hist hist = plotting.Hist(nbins, 0.5,nbins+0.5, name = "%s_delta_mean_summary" %name) deltas_mean_summary[name] = hist hist = plotting.Hist(nbins, 0.5,nbins+0.5, name = "%s_delta_sigma_summary" %name) deltas_sigma_summary[name] = hist except: set_trace() pulls_mean_summary[ 'all'] = plotting.Hist(len(pars), 0.5, len(pars)+0.5, name = "all_pull_mean_summary" ) pulls_sigma_summary[ 'all'] = plotting.Hist(len(pars), 0.5, len(pars)+0.5, name = "all_pull_sigma_summary" ) deltas_mean_summary[ 'all'] = plotting.Hist(len(pars), 0.5, len(pars)+0.5, name = "all_delta_mean_summary" ) deltas_sigma_summary['all'] = plotting.Hist(len(pars), 0.5, len(pars)+0.5, name = "all_delta_sigma_summary") for i, j in pars.iteritems(): make_post_distributions(i, j, pulls_dir, pulls_mean_summary, pulls_sigma_summary, prefix='pull_', dist='pull', prefit=prefit_nuis, tdir=pulls_tdir, skipFit=args.skipFit) make_post_distributions(i, j, pulls_dir, deltas_mean_summary, deltas_sigma_summary, prefix='delta_', dist='delta', prefit=prefit_nuis, tdir=pulls_tdir, skipFit=args.skipFit) for name,histo in pulls_mean_summary.iteritems(): canvas = plotting.Canvas() histo.Draw() canvas.Update() line = ROOT.TLine(histo.GetBinLowEdge(1),0,histo.GetBinLowEdge(histo.GetNbinsX()+1),0) line.SetLineColor(2) line.Draw("same") canvas.Update() canvas.SaveAs('%s/%s.png' % (pulls_dir,histo.GetName())) canvas.SaveAs('%s/%s.pdf' % (pulls_dir,histo.GetName())) pulls_tdir.WriteObject(histo, histo.GetName()) for name,histo in pulls_sigma_summary.iteritems(): canvas = plotting.Canvas() histo.Draw() canvas.Update() line = ROOT.TLine(histo.GetBinLowEdge(1),1,histo.GetBinLowEdge(histo.GetNbinsX()+1),1) line.SetLineColor(2) line.Draw("same") canvas.Update() canvas.SaveAs('%s/%s.png' % (pulls_dir,histo.GetName())) canvas.SaveAs('%s/%s.pdf' % (pulls_dir,histo.GetName())) pulls_tdir.WriteObject(histo, histo.GetName()) for name,histo in deltas_mean_summary.iteritems(): canvas = plotting.Canvas() histo.Draw() canvas.Update() line = ROOT.TLine(histo.GetBinLowEdge(1),0,histo.GetBinLowEdge(histo.GetNbinsX()+1),0) line.SetLineColor(2) line.Draw("same") canvas.Update() canvas.SaveAs('%s/%s.png' % (pulls_dir,histo.GetName())) canvas.SaveAs('%s/%s.pdf' % (pulls_dir,histo.GetName())) pulls_tdir.WriteObject(histo, histo.GetName()) for name,histo in deltas_sigma_summary.iteritems(): histo.Draw() canvas.Update() #line = ROOT.TLine(histo.GetBinLowEdge(1),1,histo.GetBinLowEdge(histo.GetNbinsX()+1),1) #line.Draw("same") canvas.Update() canvas.SaveAs('%s/%s.png' % (pulls_dir,histo.GetName())) canvas.SaveAs('%s/%s.pdf' % (pulls_dir,histo.GetName())) pulls_tdir.WriteObject(histo, histo.GetName()) if not args.noshapes: #Overlays the prefit values of the different shapes with the envelope of #what is fitted by the toys out = os.path.join(args.out, 'shapes') mkdir(out) biased_shapes={} if args.biasFile: with io.root_open(args.biasFile) as biased: biased_dir= biased.prefit \ if hasattr(biased, 'prefit') else \ None ROOT.TH1.AddDirectory(False) for key in biased_dir.keys(): biased_shapes[key.name] = asrootpy(key.ReadObj().Clone()) with io.root_open(args.harvested) as harvest: has_prefit = hasattr(harvest, 'prefit') prefit = harvest.prefit if has_prefit else None toys = EnvelopeView( *[harvest.get(i.GetName()).get(args.variable) for i in harvest.keys() if i.GetName().startswith('toy_') and (i.GetName() not in failed_fits) ] ) #shapes = [i.GetName() for i in prefit.keys()] #FIXME! should not depend on prefit! first_toy = [i.GetName() for i in harvest.keys() if i.GetName().startswith('toy_')][0] not_shapes = set('correlation_matrix') shapes = [i.GetName() for i in harvest.get(first_toy).get(args.variable).keys() if i.GetName() not in not_shapes] for shape in shapes: canvas = plotting.Canvas() canvas.SetCanvasSize( canvas.GetWw(), int(canvas.GetWh()*1.3) ) upper_pad = plotting.Pad(0, 0.33, 1., 1.) lower_pad = plotting.Pad(0, 0., 1., 0.33) upper_pad.set_bottom_margin(0.001) lower_pad.set_top_margin(0.005) lower_pad.set_bottom_margin(lower_pad.get_bottom_margin()*3) upper_pad.Draw() lower_pad.Draw() upper_pad.cd() biased_shape = biased_shapes.get(shape, None) toy_shape = toys.Get(shape) pre_shape = None legend = plotting.Legend( 3+int(has_prefit)+int(bool(biased_shape)), rightmargin=0.07, topmargin=0.05, leftmargin=0.45) legend.SetBorderSize(0) if biased_shape: biased_shape.title = 'true shape' biased_shape.legendstyle = 'p' biased_shape.inlegend = True biased_shape.drawstyle = 'p' if has_prefit: pre_shape = prefit.Get(shape) pre_shape.title = 'input shape' pre_shape.legendstyle = 'p' pre_shape.drawstyle = 'p' if biased_shape: pre_shape.legendstyle = 'l' pre_shape.drawstyle = 'hist' pre_shape.linecolor = 'blue' pre_shape.fillstyle = 0 toy_shape.Draw() if has_prefit: pre_shape.Draw('same') if biased_shape: biased_shape.Draw('same') legend.AddEntry(toy_shape.two_sigma) legend.AddEntry(toy_shape.one_sigma) legend.AddEntry(toy_shape.median) if has_prefit: legend.AddEntry(pre_shape) if biased_shape: legend.AddEntry(biased_shape) legend.Draw() #compute pulls pulls = None labelSizeFactor2 = (upper_pad.GetHNDC()+lower_pad.GetHNDC()) / lower_pad.GetHNDC() labelSizeFactor1 = (upper_pad.GetHNDC()+lower_pad.GetHNDC()) / upper_pad.GetHNDC() label_factor = labelSizeFactor2/labelSizeFactor1 if has_prefit or biased_shape: lower_pad.cd() ref_histo = biased_shape if biased_shape else pre_shape pulls = toy_shape.median.Clone() pulls.Reset() for ref, toy, pull in zip(ref_histo, toy_shape, pulls): if toy.error == (0.0, 0.0): continue abs_pull = toy.median-ref.value #pick correct side of the errors err = toy.error[1] if abs_pull < 0 else toy.error[0] pull.value = abs_pull/err pulls.xaxis.title = args.variable pulls.yaxis.title = 'pulls' pulls.set_label_size(ROOT.gStyle.GetLabelSize()*label_factor, "XYZ") pulls.set_title_size(ROOT.gStyle.GetTitleSize()*label_factor, "XYZ") pulls.yaxis.set_title_offset(pulls.GetYaxis().GetTitleOffset()/label_factor) pulls.Draw() canvas.Update() canvas.SaveAs('%s/%s.png' % (out, shape)) canvas.SaveAs('%s/%s.pdf' % (out, shape)) with open(os.path.join(out, '%s.json' % shape), 'w') as jfile: jfile.write(toy_shape.json()) output_file.Close()
def unfolding_toy_diagnostics(indir, variable): plotter = BasePlotter(defaults={ 'clone': False, 'name_canvas': True, 'show_title': True, 'save': { 'png': True, 'pdf': False } }, ) styles = { 'dots': { 'linestyle': 0, 'markerstyle': 21, 'markercolor': 1 }, 'compare': { 'linesstyle': [1, 0], 'markerstyle': [0, 21], 'markercolor': [2, 1], 'linecolor': [2, 1], 'drawstyle': ['hist', 'pe'], 'legendstyle': ['l', 'p'] } } xaxislabel = set_pretty_label(variable) true_distribution = None curdir = os.getcwd() os.chdir(indir) toydirs = get_immediate_subdirectories(".") methods = [] pulls_lists = {} pull_means_lists = {} pull_mean_errors_lists = {} pull_sums_lists = {} pull_sigmas_lists = {} pull_sigma_errors_lists = {} deltas_lists = {} delta_means_lists = {} delta_mean_errors_lists = {} delta_sigmas_lists = {} delta_sigma_errors_lists = {} ratio_sums_lists = {} nneg_bins_lists = {} unfoldeds_lists = {} unfolded_sigmas_lists = {} taus_lists = {} histos_created = False lists_created = False idir = 0 true_distro = None #loop over toys for directory in toydirs: if not directory.startswith('toy_'): continue os.chdir(directory) log.debug('Inspecting toy %s' % directory) idir = idir + 1 i = 0 if not os.path.isfile("result_unfolding.root"): raise ValueError('root file not found in %s' % os.getcwd()) with io.root_open("result_unfolding.root") as inputfile: log.debug('Iteration %s over the file' % i) i = i + 1 if not methods: keys = [i.name for i in inputfile.keys()] for key in keys: if hasattr(getattr(inputfile, key), "hdata_unfolded"): methods.append(key) unfolded_hists = [ inputfile.get('%s/hdata_unfolded' % i) for i in methods ] unfolded_wps_hists = [ inputfile.get('%s/hdata_unfolded_ps_corrected' % i) for i in methods ] for unf, unfps, method in zip(unfolded_hists, unfolded_wps_hists, methods): unf.name = method unfps.name = method if true_distro is None: true_distribution = inputfile.true_distribution ROOT.TH1.AddDirectory(False) true_distro = true_distribution.Clone() taus = prettyjson.loads(inputfile.best_taus.GetTitle()) if len(taus_lists) == 0: taus_lists = dict((i, []) for i in taus) for i, t in taus.iteritems(): taus_lists[i].append(t) for histo in unfolded_hists: #create pull/delta containers during first iteration name = histo.name nbins = histo.nbins() log.debug("name = %s, n bins = %s" % (name, nbins)) if not lists_created: for ibin in range(1, nbins + 1): outname = "pull_" + name + "_bin" + str(ibin) pulls_lists[outname] = [] outname = "delta_" + name + "_bin" + str(ibin) deltas_lists[outname] = [] outname = "unfolded_" + name + "_bin" + str(ibin) unfoldeds_lists[outname] = [] unfolded_sigmas_lists[outname] = [] outname = "pull_" + name pull_means_lists[outname] = {} pull_mean_errors_lists[outname] = {} pull_sigmas_lists[outname] = {} pull_sigma_errors_lists[outname] = {} outname = "delta_" + name delta_means_lists[outname] = {} delta_mean_errors_lists[outname] = {} delta_sigmas_lists[outname] = {} delta_sigma_errors_lists[outname] = {} for ibin in range(1, nbins + 1): outname = "pull_" + name + "_bin" + str(ibin) unfolded_bin_content = histo.GetBinContent(ibin) unfolded_bin_error = histo.GetBinError(ibin) true_bin_content = true_distro.GetBinContent(ibin) true_bin_error = true_distro.GetBinError(ibin) total_bin_error = math.sqrt(unfolded_bin_error**2) #??? if (total_bin_error != 0): pull = (unfolded_bin_content - true_bin_content) / total_bin_error else: pull = 9999 log.debug( 'unfolded bin content %s +/- %s, true bin content %s, pull %s' % (unfolded_bin_content, unfolded_bin_error, true_bin_content, pull)) pulls_lists[outname].append(pull) outname = "delta_" + name + "_bin" + str(ibin) delta = unfolded_bin_content - true_bin_content log.debug( 'unfolded bin content %s +/- %s, true bin content %s, delta %s' % (unfolded_bin_content, unfolded_bin_error, true_bin_content, delta)) deltas_lists[outname].append(delta) outname = "unfolded_" + name + "_bin" + str(ibin) unfoldeds_lists[outname].append(unfolded_bin_content) unfolded_sigmas_lists[outname].append(unfolded_bin_error) nneg_bins_hists = [ i for i in inputfile.keys() if i.GetName().startswith("nneg_bins") ] nneg_bins_hists = [asrootpy(i.ReadObj()) for i in nneg_bins_hists] for histo in nneg_bins_hists: #create pull/delta containers during first iteration name = histo.name nbins = histo.nbins() log.debug("name = %s, n bins = %s" % (name, nbins)) if not lists_created: outname = name nneg_bins_lists[outname] = [] outname = name nneg_bins_lists[outname].append(histo.GetBinContent(1)) pull_sums_hists = [ i for i in inputfile.keys() if i.GetName().startswith("sum_of_pulls") ] pull_sums_hists = [asrootpy(i.ReadObj()) for i in pull_sums_hists] for histo in pull_sums_hists: #create pull/delta containers during first iteration name = histo.name nbins = histo.nbins() log.debug("name = %s, n bins = %s" % (name, nbins)) if not lists_created: outname = name pull_sums_lists[outname] = [] outname = name pull_sums_lists[outname].append(histo.GetBinContent(1)) ratio_sums_hists = [ i for i in inputfile.keys() if i.GetName().startswith("sum_of_ratios") ] ratio_sums_hists = [ asrootpy(i.ReadObj()) for i in ratio_sums_hists ] for histo in ratio_sums_hists: #create ratio/delta containers during first iteration name = histo.name nbins = histo.nbins() log.debug("name = %s, n bins = %s" % (name, nbins)) if not lists_created: outname = name ratio_sums_lists[outname] = [] outname = name ratio_sums_lists[outname].append(histo.GetBinContent(1)) #after the first iteration on the file all the lists are created lists_created = True os.chdir("..") #create histograms #histo containers taus = {} for name, vals in taus_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if val_min == val_max: if tau_nbins % 2: #if odd val_min, val_max = val_min - 0.01, val_min + 0.01 else: brange = 0.02 bwidth = brange / tau_nbins val_min, val_max = val_min - 0.01 + bwidth / 2., val_min + 0.01 + bwidth / 2. title = '#tau choice - %s ;#tau;N_{toys}' % (name) histo = Hist(tau_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) taus[name] = histo pulls = {} for name, vals in pulls_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max abs_max = max(abs(val_min), abs(val_max)) if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: _, method, binno = tuple(name.split('_')) title = 'Pulls - %s - %s ;Pull;N_{toys}' % (binno, method) histo = Hist(pull_nbins, -abs_max, abs_max, name=name, title=title) for val in vals: histo.Fill(val) pulls[name] = histo deltas = {} for name, vals in deltas_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: _, method, binno = tuple(name.split('_')) title = 'Deltas - %s - %s ;Delta;N_{toys}' % (binno, method) histo = Hist(delta_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) deltas[name] = histo unfoldeds = {} for name, vals in unfoldeds_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: _, method, binno = tuple(name.split('_')) title = 'Unfoldeds - %s - %s ;Unfolded;N_{toys}' % (binno, method) histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) unfoldeds[name] = histo nneg_bins = {} for name, vals, in nneg_bins_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0 if val_min > 0 else val_min - 1 val_max = max(vals) val_max = 0 if val_max < 0 else val_max + 1 if 'L_curve' in name: method = 'L_curve' else: set_trace() _, method, _ = tuple(name.split('_')) title = 'N of negative bins - %s ;N. neg bins;N_{toys}' % method histo = Hist(int(val_max - val_min + 1), val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) nneg_bins[name] = histo pull_sums = {} for name, vals in pull_sums_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' else: set_trace() _, _, _, _, _, method = tuple(name.split('_')) title = 'Pull sums - %s ;#Sigma(pull)/N_{bins};N_{toys}' % method histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) pull_sums[name] = histo ratio_sums = {} for name, vals in ratio_sums_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: set_trace() _, _, _, _, _, method = tuple(name.split('_')) title = 'Ratio sums - %s;#Sigma(ratio)/N_{bins};N_{toys}' % method histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) ratio_sums[name] = histo unfolded_sigmas = {} for name, vals in unfolded_sigmas_lists.iteritems(): ROOT.TH1.AddDirectory(False) #repeat, you never know val_min = min(vals) val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min val_max = max(vals) val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max if 'L_curve' in name: method = 'L_curve' binno = name.split('_')[-1] else: _, method, binno = tuple(name.split('_')) title = 'Unfolded uncertainties - %s - %s ;Uncertainty;N_{toys}' % ( binno, method) histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title) for val in vals: histo.Fill(val) unfolded_sigmas[name] = histo for name, histo in pulls.iteritems(): log.debug("name is %s and object type is %s" % (name, type(histo))) histo.Fit("gaus", 'Q') if not histo.GetFunction("gaus"): log.warning("Function not found for histogram %s" % name) continue mean = histo.GetFunction("gaus").GetParameter(1) meanError = histo.GetFunction("gaus").GetParError(1) sigma = histo.GetFunction("gaus").GetParameter(2) sigmaError = histo.GetFunction("gaus").GetParError(2) general_name, idx = tuple(name.split('_bin')) idx = int(idx) pull_means_lists[general_name][idx] = mean pull_mean_errors_lists[general_name][idx] = meanError pull_sigmas_lists[general_name][idx] = sigma pull_sigma_errors_lists[general_name][idx] = sigmaError for name, histo in deltas.iteritems(): log.debug("name is %s and object type is %s" % (name, type(histo))) histo.Fit("gaus", 'Q') if not histo.GetFunction("gaus"): log.warning("Function not found for histogram %s" % name) continue mean = histo.GetFunction("gaus").GetParameter(1) meanError = histo.GetFunction("gaus").GetParError(1) sigma = histo.GetFunction("gaus").GetParameter(2) sigmaError = histo.GetFunction("gaus").GetParError(2) general_name, idx = tuple(name.split('_bin')) idx = int(idx) delta_means_lists[general_name][idx] = mean delta_mean_errors_lists[general_name][idx] = meanError delta_sigmas_lists[general_name][idx] = sigma delta_sigma_errors_lists[general_name][idx] = sigmaError outfile = rootpy.io.File("unfolding_diagnostics.root", "RECREATE") outfile.cd() pull_means = {} pull_sigmas = {} pull_means_summary = {} pull_sigmas_summary = {} delta_means = {} delta_sigmas = {} delta_means_summary = {} delta_sigmas_summary = {} for outname, pmeans in pull_means_lists.iteritems(): outname_mean = outname + "_mean" outtitle = "Pull means - " + outname + ";Pull mean; N_{toys}" pull_mean_min = min(pmeans.values()) pull_mean_max = max(pmeans.values()) pull_mean_newmin = pull_mean_min - (pull_mean_max - pull_mean_min) * 0.5 pull_mean_newmax = pull_mean_max + (pull_mean_max - pull_mean_min) * 0.5 pull_means[outname] = plotting.Hist(pull_mean_nbins, pull_mean_newmin, pull_mean_newmax, name=outname_mean, title=outtitle) outname_mean_summary = outname + "_mean_summary" outtitle_mean_summary = "Pull mean summary - " + outname histocloned = true_distro.Clone(outname_mean_summary) histocloned.Reset() histocloned.xaxis.title = xaxislabel histocloned.yaxis.title = 'Pull mean' histocloned.title = outtitle_mean_summary pull_means_summary[outname] = histocloned for idx, pmean in pmeans.iteritems(): pull_means[outname].Fill(pmean) histocloned[idx].value = pmean histocloned[idx].error = pull_mean_errors_lists[outname][idx] histocloned.yaxis.SetRangeUser(min(pmeans.values()), max(pmeans.values())) for outname, psigmas in pull_sigmas_lists.iteritems(): outname_sigma = outname + "_sigma" outtitle_sigma = "Pull #sigma's - " + outname + ";Pull #sigma; N_{toys}" pull_sigma_min = min(psigmas.values()) pull_sigma_max = max(psigmas.values()) pull_sigma_newmin = pull_sigma_min - (pull_sigma_max - pull_sigma_min) * 0.5 pull_sigma_newmax = pull_sigma_max + (pull_sigma_max - pull_sigma_min) * 0.5 pull_sigmas[outname] = plotting.Hist(pull_sigma_nbins, pull_sigma_newmin, pull_sigma_newmax, name=outname_sigma, title=outtitle_sigma) outname_sigma_summary = outname + "_sigma_summary" outtitle_sigma_summary = "Pull #sigma summary - " + outname histocloned = true_distro.Clone(outname_sigma_summary) histocloned.Reset() histocloned.xaxis.title = xaxislabel histocloned.yaxis.title = 'Pull #sigma' histocloned.title = outtitle_sigma_summary pull_sigmas_summary[outname] = histocloned for idx, psigma in psigmas.iteritems(): pull_sigmas[outname].Fill(psigma) histocloned[idx].value = psigma histocloned[idx].error = pull_sigma_errors_lists[outname][idx] histocloned.yaxis.SetRangeUser(min(psigmas.values()), max(psigmas.values())) for outname, dmeans in delta_means_lists.iteritems(): outname_mean = outname + "_mean" outtitle = "Delta means - " + outname + ";Delta mean; N_{toys}" delta_mean_min = min(dmeans.values()) delta_mean_max = max(dmeans.values()) delta_mean_newmin = delta_mean_min - (delta_mean_max - delta_mean_min) * 0.5 delta_mean_newmax = delta_mean_max + (delta_mean_max - delta_mean_min) * 0.5 delta_means[outname] = plotting.Hist(delta_mean_nbins, delta_mean_newmin, delta_mean_newmax, name=outname_mean, title=outtitle) outname_mean_summary = outname + "_mean_summary" outtitle_mean_summary = "Delta mean summary - " + outname histocloned = true_distro.Clone(outname_mean_summary) histocloned.Reset() histocloned.xaxis.title = xaxislabel histocloned.yaxis.title = 'Delta mean' histocloned.title = outtitle_mean_summary delta_means_summary[outname] = histocloned for idx, dmean in dmeans.iteritems(): delta_means[outname].Fill(dmean) histocloned[idx].value = dmean histocloned[idx].error = delta_mean_errors_lists[outname][idx] histocloned.yaxis.SetRangeUser(min(dmeans.values()), max(dmeans.values())) for outname, dsigmas in delta_sigmas_lists.iteritems(): outname_sigma = outname + "_sigma" outtitle_sigma = "Delta #sigma's - " + outname + ";Delta #sigma; N_{toys}" delta_sigma_min = min(dsigmas.values()) delta_sigma_max = max(dsigmas.values()) delta_sigma_newmin = delta_sigma_min - (delta_sigma_max - delta_sigma_min) * 0.5 delta_sigma_newmax = delta_sigma_max + (delta_sigma_max - delta_sigma_min) * 0.5 delta_sigmas[outname] = plotting.Hist(delta_sigma_nbins, delta_sigma_newmin, delta_sigma_newmax, name=outname_sigma, title=outtitle_sigma) outname_sigma_summary = outname + "_sigma_summary" outtitle_sigma_summary = "Delta #sigma summary - " + outname histocloned = true_distro.Clone(outname_sigma_summary) histocloned.Reset() histocloned.xaxis.title = xaxislabel histocloned.yaxis.title = 'Delta #sigma' histocloned.title = outtitle_sigma_summary delta_sigmas_summary[outname] = histocloned for idx, dsigma in dsigmas.iteritems(): delta_sigmas[outname].Fill(dsigma) histocloned[idx].value = dsigma histocloned[idx].error = delta_sigma_errors_lists[outname][idx] histocloned.yaxis.SetRangeUser(min(dsigmas.values()), max(dsigmas.values())) unfolded_summary = {} unfolded_average = {} unfolded_envelope = {} for name, histo in unfoldeds.iteritems(): log.debug("name is %s and object type is %s" % (name, type(histo))) histo.Fit("gaus", 'Q') if not histo.GetFunction("gaus"): log.warning("Function not found for histogram %s" % name) continue mean = histo.GetFunction("gaus").GetParameter(1) meanError = histo.GetFunction("gaus").GetParError(1) sigma = histo.GetFunction("gaus").GetParameter(2) sigmaError = histo.GetFunction("gaus").GetParError(2) general_name, idx = tuple(name.split('_bin')) idx = int(idx) if general_name not in unfolded_summary: histo = true_distro.Clone("%s_unfolded_summary" % general_name) outtitle_unfolded_summary = "Unfolded summary - " + general_name histo.Reset() histo.xaxis.title = xaxislabel histo.yaxis.title = 'N_{events}' histo.title = outtitle_unfolded_summary unfolded_summary[general_name] = histo unfolded_envelope[general_name] = histo.Clone( "%s_unfolded_envelope" % general_name) unfolded_average[general_name] = histo.Clone( "%s_unfolded_average" % general_name) unfolded_summary[general_name][idx].value = mean unfolded_summary[general_name][idx].error = meanError unfolded_envelope[general_name][idx].value = mean unfolded_envelope[general_name][idx].error = sigma unfolded_average[general_name][idx].value = mean unfolded_average[general_name][idx].error = \ unfolded_sigmas['%s_bin%i' % (general_name, idx)].GetMean() plotter.set_subdir('taus') for name, histo in taus.iteritems(): #canvas = plotter.create_and_write_canvas_single(0, 21, 1, False, False, histo, write=False) plotter.canvas.cd() histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) info = plotter.make_text_box( 'mode #tau = %.5f' % histo[histo.GetMaximumBin()].x.center, position=(plotter.pad.GetLeftMargin(), plotter.pad.GetTopMargin(), 0.3, 0.025)) info.Draw() plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('pulls') for name, histo in pulls.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in pull_means.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.Write() plotter.save() for name, histo in pull_sigmas.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.Write() plotter.save() plotter.set_subdir('pull_summaries') for name, histo in pull_means_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) #histo.SetStats(True) line = ROOT.TLine(histo.GetBinLowEdge(1), 0, histo.GetBinLowEdge(histo.GetNbinsX() + 1), 0) line.Draw("same") plotter.save() histo.Write() plotter.canvas.Write() for name, histo in pull_sigmas_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) #histo.SetStats(True) line = ROOT.TLine(histo.GetBinLowEdge(1), 1, histo.GetBinLowEdge(histo.GetNbinsX() + 1), 1) line.Draw("same") plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('deltas') for name, histo in deltas.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in delta_means.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.Write() plotter.save() for name, histo in delta_sigmas.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.Write() plotter.save() plotter.set_subdir('delta_summaries') for name, histo in delta_means_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) #histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in delta_sigmas_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) #histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('unfolding_unc') for name, histo in unfolded_sigmas.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('unfolded') for name, histo in unfoldeds.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() plotter.set_subdir('unfolded_summaries') for name, histo in unfolded_summary.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in unfolded_summary.iteritems(): leg = LegendDefinition("Unfolding comparison", 'NE', labels=['Truth', 'Unfolded']) plotter.overlay_and_compare([true_distro], histo, legend_def=leg, **styles['compare']) plotter.canvas.name = 'Pull_' + name plotter.save() plotter.canvas.Write() plotter.overlay_and_compare([true_distro], histo, legend_def=leg, method='ratio', **styles['compare']) plotter.canvas.name = 'Ratio_' + name plotter.save() plotter.canvas.Write() plotter.set_subdir('unfolded_average') for name, histo in unfolded_average.iteritems(): leg = LegendDefinition("Unfolding comparison", 'NE', labels=['Truth', 'Unfolded']) #set_trace() plotter.overlay_and_compare([true_distro], histo, legend_def=leg, **styles['compare']) plotter.canvas.name = 'Pull_' + name plotter.save() plotter.canvas.Write() plotter.overlay_and_compare([true_distro], histo, legend_def=leg, method='ratio', **styles['compare']) plotter.canvas.name = 'Ratio_' + name plotter.save() plotter.canvas.Write() plotter.set_subdir('unfolded_envelope') for name, histo in unfolded_envelope.iteritems(): leg = LegendDefinition("Unfolding comparison", 'NE', labels=['Truth', 'Unfolded']) plotter.overlay_and_compare([true_distro], histo, legend_def=leg, **styles['compare']) plotter.canvas.name = 'Pull_' + name plotter.save() plotter.canvas.Write() plotter.overlay_and_compare([true_distro], histo, legend_def=leg, method='ratio', **styles['compare']) plotter.canvas.name = 'Ratio_' + name plotter.save() plotter.canvas.Write() plotter.set_subdir('figures_of_merit') for name, histo in nneg_bins.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in pull_sums.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() for name, histo in ratio_sums.iteritems(): histo = plotter.plot(histo, **styles['dots']) histo.SetStats(True) plotter.save() histo.Write() plotter.canvas.Write() outfile.close() os.chdir(curdir)
xml_name = os.path.join(os.getcwd(),"weights/TMVAClassification_KNN.weights.xml") target = os.path.join(os.getcwd(),output_file.replace('.root','.weights.xml')) cmd = 'mv %s %s' % (xml_name, target) log.info(cmd) os.system( cmd ) ############################################# ## Reads back and produces control plots ############################################# hist_maps = {} for var in args.variables: if 'pt' in var.lower(): hist_maps[var] = { 'estimate' : plotting.Hist([10,12,15,20,25,30,35,40,45,50,60,70,100,150,200]), #plotting.Hist(100, 0, 200), 'estimate_all' : plotting.Hist([10,12,15,20,25,30,35,40,45,50,60,70,100,150,200]), #plotting.Hist(100, 0, 200), 'pass' : plotting.Hist([10,12,15,20,25,30,35,40,45,50,60,70,100,150,200]), 'all' : plotting.Hist([10,12,15,20,25,30,35,40,45,50,60,70,100,150,200]), } elif 'jets' in var.lower() or 'njet' in var.lower(): hist_maps[var] = { 'estimate' : plotting.Hist(12, 0, 12), 'estimate_all' : plotting.Hist(12, 0, 12), 'pass' : plotting.Hist(12, 0, 12), 'all' : plotting.Hist(12, 0, 12), } else: hist_maps[var] = { 'estimate' : plotting.Hist(100, 0, 200), 'estimate_all' : plotting.Hist(100, 0, 200),
style = get_style('ATLAS') tsize = 18 style.SetHistLineWidth(20) style.SetLabelSize(tsize, "x") style.SetTitleSize(tsize, "x") style.SetLabelSize(tsize, "y") style.SetTitleSize(tsize, "y") set_style(style) ########################Define Histos##############################3 h_nPMuon_4GeV = plt.Hist(5, 0, 5, name="h_nPMuon_4GeV", title="h_nPMuon_4GeV", legendstyle='lep') h_nTMuon_4GeV = plt.Hist(5, 0, 5, name="h_nTMuon_4GeV", title="h_nMuon_4GeV", legendstyle='lep') h_nLMuon_4GeV = plt.Hist(5, 0, 5, name="h_nLMuon_4GeV", title="h_nLMuon_4GeV", legendstyle='lep')
return discr_nodes dnn_aachen.load_trained_model() data = dnn_aachen.data.get_test_data(as_matrix=False) prediction_before = dnn_aachen.main_net.predict(data.values) discriminators_before = gen_discrs(prediction_before) before_hists = [] bins = 100 bin_range = [0., 1.] for i_node in range(len(event_classes)): node_values = discriminators_before[i_node] h = rp.Hist(bins, *bin_range, title="before smearing") h.markersize = 0 h.legendstyle = "F" h.fillstyle = "solid" h.linecolor = "black" h.fill_array(node_values) before_hists.append(h) # generate loop over different std deviation stddevs = np.arange(0.005, 0.305, 0.01) print(stddevs) #np.arange(0.01,0.31,0.01) rate_of_other_argmax = [] mean_diff = [] std_diff = []
('PF GSFTrk', seeding & (electrons.gsf_pt > 0) & electrons.has_pfGSF_trk), ('PF Block', seeding & (electrons.gsf_pt > 0) & electrons.has_pfBlock), ('PF Block+ECAL', seeding & (electrons.gsf_pt > 0) & electrons.has_pfBlock_with_ECAL), ('PF Ele', seeding & (electrons.gsf_pt > 0) & electrons.has_pfEgamma), ('GED Core', seeding & (electrons.gsf_pt > 0) & electrons.has_ele_core), ('GED Electrons', seeding & (electrons.ele_pt > 0)), ] to_plot = {'KTF Track', 'seeding', 'GSF Track', 'GED Electrons'} masks = dict(ordered_masks) for name, mask in masks.iteritems(): hist = rplt.Hist([1, 2, 4, 5, 6, 7, 8, 9, 10] if not args.test else [0, 1, 2, 5, 10]) masked = electrons[mask] if mask is not None else electrons root_numpy.fill_hist( hist, masked.gen_pt if not (args.allTracks or args.fakes) else masked.trk_pt) histos[name] = hist efficiencies = {} markersize = 6 first = True plt.clf() offset = 0.1 * (len(masks) - 1) / 2 for passing, _ in ordered_masks: if passing == 'all': continue efficiencies[passing] = rplt.Efficiency(histos[passing], histos['all'])
def plot_prenet_nodes(self, log=False): ''' plot prenet nodes ''' pltstyle.init_plot_style() n_bins = 20 bin_range = [0., 1.] for i, node_cls in enumerate(self.prenet_targets): # get outputs of class node out_values = self.prenet_predicted_vector[:, i] prenet_labels = self.data.get_prenet_test_labels()[:, i] sig_values = [ out_values[k] for k in range(len(out_values)) if prenet_labels[k] == 1 ] bkg_values = [ out_values[k] for k in range(len(out_values)) if prenet_labels[k] == 0 ] sig_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) if prenet_labels[k] == 1 ] bkg_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) if prenet_labels[k] == 0 ] bkg_sig_ratio = 1. * sum(bkg_weights) / sum(sig_weights) sig_weights = [w * bkg_sig_ratio for w in sig_weights] sig_label = "True" bkg_label = "False" sig_title = sig_label + "*{:.3f}".format(bkg_sig_ratio) # plot output bkg_hist = rp.Hist(n_bins, *bin_range, title=bkg_label) pltstyle.set_bkg_hist_style(bkg_hist, bkg_label) bkg_hist.fill_array(bkg_values, bkg_weights) sig_hist = rp.Hist(n_bins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) stack = rp.HistStack([bkg_hist], stacked=True, drawstyle="HIST E1 X0") stack.SetMinimum(1e-4) canvas = pltstyle.init_canvas() rp.utils.draw([stack, sig_hist], xtitle="prenet node {}".format(node_cls), ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() legend = pltstyle.init_legend([bkg_hist, sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) out_path = self.save_path + "/prenet_output_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)
def run_module(**kwargs): args = Struct(**kwargs) redundant_binning = {} with open(args.binning) as bins: redundant_binning = prettyjson.loads(bins.read()) #group binning to merge jet categories grouping = re.compile('^(?P<base_category>[A-Za-z0-9]+)_\d+Jets$') binning = {} for var, categories in redundant_binning.iteritems(): if var not in binning: binning[var] = {} for category, bin_info in categories.iteritems(): m = grouping.match(category) if not m: raise ValueError('Category name %s did not match the regex!' % category) base = m.group('base_category') if base not in binning[var]: binning[var][base] = copy.deepcopy(bin_info) else: #make sure that all jet categories have the same bin edges assert(binning[var][base] == bin_info) for info in binning.itervalues(): edges = set( i['low_edge'] for i in info.itervalues() ) edges.update( set( i['up_edge'] for i in info.itervalues() ) ) edges = sorted(list(edges)) info['edges'] = edges prefit_norms = {} with io.root_open(args.input_shape) as shapes: for key in shapes.keys(): obj = key.ReadObj() if not obj.InheritsFrom('TDirectory'): continue for hkey in obj.GetListOfKeys(): hist = hkey.ReadObj() if not hist.InheritsFrom('TH1'): continue err = ROOT.Double() integral = hist.IntegralAndError( 1, hist.GetNbinsX(), err ) val_id = uuid.uuid4().hex val = ROOT.RooRealVar(val_id, val_id, integral) val.setError(err) prefit_norms['%s/%s' % (obj.GetName(), hist.GetName())] = val with io.root_open(args.fitresult) as results: dirs = [''] if args.toys: dirs = [i.GetName() for i in results.GetListOfKeys() if i.GetName().startswith('toy_')] postfit_table = Table('Bin:%7s', 'Category:%10s', 'Sample:%20s', 'Yield:%5.1f', 'Error:%5.1f') postfit_norms = [(i.name, i.value, i.error) for i in results.norm_fit_s] postfit_norms.sort(key=lambda x: x[0]) for name, val, err in postfit_norms: bincat, sample = tuple(name.split('/')) bin, category = tuple(bincat.split('_')) postfit_table.add_line(bin, category, sample, val, err) postfit_table.add_separator() with open(args.out.replace('.root','.raw_txt'), 'w') as out: out.write(postfit_table.__repr__()) with io.root_open(args.out, 'recreate') as output: is_prefit_done = False for dirname in dirs: input_dir = results.Get(dirname) if dirname else results if not hasattr(input_dir, 'fit_s'): continue fit_result = input_dir.fit_s pars = asrootpy(fit_result.floatParsFinal()) prefit_pars = asrootpy(fit_result.floatParsInit()) tdir = output if dirname: tdir = output.mkdir(dirname) tdir.cd() hcorr = asrootpy(fit_result.correlationHist()) par_names = set([i.name for i in pars]) yield_par_names = filter(lambda x: '_FullYield_' in x, par_names) hcorr.Write() for observable, info in binning.iteritems(): var_dir = tdir.mkdir(observable) var_dir.cd() hists = {} hists_prefit = {} for rvar_name in yield_par_names: category, sample = tuple(rvar_name.split('_FullYield_')) if category not in info: continue if sample not in hists: hists[sample] = plotting.Hist( info['edges'], name = sample ) if not is_prefit_done: hists_prefit[sample] = plotting.Hist( info['edges'], name = sample ) idx = info[category]['idx']+1 hists[sample][idx].value = pars[rvar_name].value error = pars[rvar_name].error hists[sample][idx].error = max(abs(i) for i in error) if isinstance(error, tuple) else error #get max of asym error if not is_prefit_done: hists_prefit[sample][idx].value = prefit_pars[rvar_name].value ## Pre-fit floating parameters have no uncertainties ## hists_prefit[sample][idx].error = max(prefit_pars[rvar_name].error) logging.debug( 'Assigning label %s to bin %i for %s/%s' % (rvar_name, idx, category, sample) ) hists[sample].xaxis.SetBinLabel(idx, rvar_name) for h in hists.itervalues(): logging.debug( h.Write() ) if not is_prefit_done: is_prefit_done = True output.mkdir('prefit').cd() for h in hists_prefit.itervalues(): logging.debug( h.Write() )