def plot (*argv): """ Method for delegating plotting. """ # Unpack arguments args, data, features, ROCs, AUCs, masscut, pt_range = argv # Canvas c = rp.canvas(batch=not args.show) # Plots # -- Random guessing bins = np.linspace(0.2, 1., 100 + 1, endpoint=True) bins = np.array([bins[0], bins[0] + 0.01 * np.diff(bins[:2])[0]] + list(bins[1:])) #bins = np.array([0.2] + list(bins[1:])) #edges = bins[1:-1] edges = bins centres = edges[:-1] + 0.5 * np.diff(edges) c.hist(np.power(centres, -1.), bins=edges, linecolor=ROOT.kGray + 2, fillcolor=ROOT.kBlack, alpha=0.05, linewidth=1, option='HISTC') # -- ROCs for is_simple in [True, False]: # Split the legend into simple- and MVA taggers for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)): eff_sig, eff_bkg = ROCs[feat] c.graph(np.power(eff_bkg, -1.), bins=eff_sig, linestyle=1 + (ifeat % 2), linecolor=rp.colours[(ifeat // 2) % len(rp.colours)], linewidth=2, label=latex(feat, ROOT=True), option='L') pass # Draw class-specific legend width = 0.17 c.legend(header=("Analytical:" if is_simple else "MVA:"), width=width, xmin=0.58 + (width) * (is_simple), ymax=0.888) pass # Decorations c.xlabel("Signal efficiency #varepsilon_{sig}^{rel}") c.ylabel("Background rejection 1/#varepsilon_{bkg}^{rel}") c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER) c.text(["#sqrt{s} = 13 TeV", "#it{W} jet tagging"] + ( ["p_{{T}} #in [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else [] ) + ( ["Cut: m #in [60, 100] GeV"] if masscut else [] ), ATLAS=False) ranges = int(pt_range is not None) + int(masscut) mult = 10. if ranges == 2 else (2. if ranges == 1 else 1.) c.latex("Random guessing", 0.4, 1./0.4 * 0.9, align=23, angle=-12 + 2 * ranges, textsize=13, textcolor=ROOT.kGray + 2) c.xlim(0.2, 1.) c.ylim(1E+00, 5E+02 * mult) c.logy() c.legend() return c
def plot(*argv): """ Method for delegating plotting. """ # Unpack arguments experiment, means, graph, idx_improvements, best_mean, bins = argv # Plot results c = rp.canvas(batch=True) ymax = 1.0 # 1.5 ymin = 0.3 oobx = map(lambda t: t[0], filter(lambda t: t[1] > ymax, enumerate(means))) ooby = np.ones_like(oobx) * 0.96 * (ymax - ymin) + ymin # Plots c.graph(graph, markercolor=rp.colours[1], linecolor=rp.colours[1], markersize=0.7, option='AP', label='Evaluations', legend_option='PE') c.graph(ooby, bins=oobx, markercolor=rp.colours[1], markerstyle=22, option='P') c.graph(best_mean, bins=bins, linecolor=rp.colours[5], linewidth=2, option='L', label='Best result') c.graph(best_mean[idx_improvements], bins=bins[idx_improvements], markercolor=rp.colours[5], markersize=0.5, option='P') # Decorations c.pad()._yaxis().SetNdivisions(505) c.xlabel("Bayesian optimisation step") c.ylabel("Cross-validation optimisation metric, L_{clf}^{val}") c.xlim(0, len(bins)) #c.ylim(0, ymax) c.ylim(0.3, 1.0) c.legend(width=0.22, ymax=0.816) c.text(["#sqrt{s} = 13 TeV", "Neural network (NN) classifier"], qualifier=QUALIFIER) # Save mkdir('figures/optimisation/') c.save('figures/optimisation/optimisation_{}.pdf'.format(experiment)) return
def plot2D (*argv): """ Method for delegating 2D plotting. """ # Unpack arguments data, ddt, lda, contours, binsx, binsy, variable = argv with TemporaryStyle() as style: # Style style.SetNumberContours(10) # Canvas c = rp.canvas(batch=True) # Axes c.hist([binsy[0]], bins=[binsx[0], binsx[-1]], linestyle=0, linewidth=0) # Plotting contours for sig in [0,1]: c.hist2d(contours[sig], linecolor=rp.colours[1 + 3 * sig], label="Signal" if sig else "Background", option='CONT3', legend_option='L') pass # Linear fit x1, x2 = 1.5, 5.0 intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_ y1 = intercept + x1 * coef y2 = intercept + x2 * coef c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='DDT transform fit', linewidth=1, linestyle=1, option='L') # LDA decision boundary y1 = lda.intercept_ + x1 * lda.coef_ y2 = lda.intercept_ + x2 * lda.coef_ c.plot([y1,y2], bins=[x1,x2], label='LDA boundary', linewidth=1, linestyle=2, option='L') # Decorations c.text(["#sqrt{s} = 13 TeV"], qualifier=QUALIFIER, ATLAS=False) c.legend() c.ylim(binsy[0], binsy[-1]) c.xlabel("Large-#it{R} jet " + latex('rhoDDT', ROOT=True)) if variable == VAR_TAU21: c.ylabel("Large-#it{R} jet " + latex('#tau_{21}', ROOT=True)) #changed these to latex formatting elif variable == VAR_N2: c.ylabel("Large-#it{R} jet " + latex('N_{2}', ROOT=True)) elif variable == VAR_DECDEEP: c.ylabel("Large-#it{R} jet " + latex('dec_deepWvsQCD', ROOT=True)) elif variable == VAR_DEEP: c.ylabel("Large-#it{R} jet " + latex('deepWvsQCD', ROOT=True)) # Save mkdir('figures/ddt') c.save('figures/ddt/ddt_{}_2d.pdf'.format(variable)) pass return
def plot(*argv): """ Method for delegating plotting. """ # Unpack arguments args, data, feat, profiles, cuts, effs = argv with TemporaryStyle() as style: # Style style.SetTitleOffset(1.6, 'y') # Canvas c = rp.canvas(batch=not args.show) # Plots for idx, (profile, cut, eff) in enumerate(zip(profiles, cuts, effs)): colour = rp.colours[idx + 0] linestyle = 1 c.hist(profile, linecolor=colour, linestyle=linestyle, option='HIST L') c.hist(profile, linecolor=colour, fillcolor=colour, alpha=0.3, option='E3', label=(" " if eff < 10 else "") + "{:d}%".format(eff)) pass # Decorations c.xlabel("Large-#it{R} jet mass [GeV]") c.ylabel("Background efficiency, #varepsilon_{bkg}^{rel}") c.text( [ "#sqrt{s} = 13 TeV, Multijets", #"#it{W} jet tagging", "Cuts on {}".format(latex(feat, ROOT=True)), ], qualifier=QUALIFIER, ATLAS=False) c.ylim(0, 2.0) c.legend(reverse=True, width=0.25, ymax=0.87, header="Incl. #bar{#varepsilon}_{bkg}^{rel}:") pass return c
def plot(profile, fit): """ Method for delegating plotting. """ # rootplotting c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile.GetXaxis().SetTitle(latex(VARX, ROOT=True) + " [GeV]") #+ " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle(latex(VARY, ROOT=True) + " [GeV]") profile.GetZaxis().SetTitle("%s %s^{(%s%%)}" % ("#it{k}-NN fitted" if fit else "Measured", latex(VAR, ROOT=True), EFF)) profile.GetYaxis().SetNdivisions(505) profile.GetZaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.8) profile.GetZaxis().SetTitleOffset(1.3) if ZRANGE: profile.GetZaxis().SetRangeUser(*ZRANGE) pass profile.SetContour(NB_CONTOUR) # Draw profile.Draw('COLZ') BOUNDS[0].DrawCopy("SAME") BOUNDS[1].DrawCopy("SAME") #c.latex("m > 50 GeV", -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3) #c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3) # Decorations #c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15) c.text(["#sqrt{s} = 13 TeV", "Multijets"], ATLAS=False, textcolor=ROOT.kWhite) # Save mkdir('figures/knn/') c.save('figures/knn/knn_{}_{:s}_{}_{}.pdf'.format( 'fit' if fit else 'profile', VAR, EFF, MODEL)) c.save('figures/knn/knn_{}_{:s}_{}_{}.eps'.format( 'fit' if fit else 'profile', VAR, EFF, MODEL)) pass
def plot (*argv): """ Method for delegating plotting. """ # Unpack arguments data, args, feat, msk_pass, msk_bkg, eff_sig = argv # Global variable override(s) HISTSTYLE[True] ['label'] = "Passing cut" HISTSTYLE[False]['label'] = "Failing cut" # Canvas c = rp.canvas(num_pads=2, size=(int(800 * 600 / 857.), 600), batch=not args.show) # Plots base = dict(bins=MASSBINS, alpha=0.3, normalise=True, linewidth=3) hist = dict() for passing, name in zip([False, True], ['fail', 'pass']): msk = msk_bkg & (msk_pass if passing else ~msk_pass) HISTSTYLE[passing].update(base) hist[name] = c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **HISTSTYLE[passing]) pass # Ratio plots c.ratio_plot((hist['pass'], hist['pass']), option='HIST', fillstyle=0, linecolor=ROOT.kGray + 1, linewidth=1, linestyle=1) c.ratio_plot((hist['pass'], hist['fail']), option='E2', fillstyle=1001, fillcolor=rp.colours[0], linecolor=rp.colours[0], alpha=0.3) # -- Set this before drawing OOB markers c.pads()[1].logy() c.pads()[1].ylim(1E-01, 1E+01) h_ratio = c.ratio_plot((hist['pass'], hist['fail']), option='HIST', fillstyle=0, linewidth=3, linecolor=rp.colours[0]) # oob=True, oob_color=rp.colours[0]) # Decorations c.xlabel("Large-#it{R} jet mass [GeV]") c.ylabel("Fraction of jets") c.text(["#sqrt{s} = 13 TeV, Multijets", "#varepsilon_{sig} = %d%% cut on %s" % (eff_sig, latex(feat, ROOT=True)), ], qualifier=QUALIFIER, ATLAS=False) c.ylim(2E-04, 2E+02) c.logy() c.legend() c.pads()[1].ylabel("Passing / failing") return c
def plot1D (*argv): """ Method for delegating 1D plotting. """ # Unpack arguments graphs, ddt, arr_x = argv # Style ROOT.gStyle.SetTitleOffset(1.4, 'x') # Canvas c = rp.canvas(batch=True) # Setup pad = c.pads()[0]._bare() pad.cd() pad.SetTopMargin(0.10) pad.SetTopMargin(0.10) # Profiles c.graph(graphs['Tau21'], label="Original, #tau_{21}", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs['Tau21DDT'], label="Transformed, #tau_{21}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') # Fit x1, x2 = min(arr_x), max(arr_x) intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_ y1 = intercept + x1 * coef y2 = intercept + x2 * coef c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='Linear fit', linewidth=1, linestyle=1, option='L') # Decorations c.xlabel("Large-#it{R} jet #rho^{DDT} = log(m^{2}/ p_{T} / 1 GeV)") c.ylabel("#LT#tau_{21}#GT, #LT#tau_{21}^{DDT}#GT") c.text(["#sqrt{s} = 13 TeV, Multijets"], qualifier=QUALIFIER) c.legend(width=0.25, xmin=0.57, ymax=None if "Internal" in QUALIFIER else 0.85) c.ylim(0, 1.4) c.latex("Fit range", sum(FIT_RANGE) / 2., 0.08, textsize=13, textcolor=ROOT.kGray + 2) c.xline(FIT_RANGE[0], ymax=0.82, text_align='BR', linecolor=ROOT.kGray + 2) c.xline(FIT_RANGE[1], ymax=0.82, text_align='BL', linecolor=ROOT.kGray + 2) # Save mkdir('figures/ddt/') c.save('figures/ddt/ddt.pdf') return
def plot(*argv): """ Method for delegating plotting. """ # Unpack arguments args, data, feat, bins, pt_range, mass_range = argv # Canvas c = rp.canvas(batch=not args.show) # Style histstyle = dict(**HISTSTYLE) base = dict(bins=bins, alpha=0.5, normalise=True, linewidth=3) # Plots for signal in [0, 1]: msk = (data['signal'] == signal) histstyle[signal].update(base) c.hist(data.loc[msk, feat].values, weights=data.loc[msk, 'weight_test'].values, **histstyle[signal]) pass # Decorations c.xlabel("Large-#it{R} jet " + latex(feat, ROOT=True)) c.ylabel("Fraction of jets") c.text(TEXT + ["#it{W} jet tagging"] + ([ "p_{{T}} #in [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1]) ] if pt_range is not None else []) + ([ "m #in [{:.0f}, {:.0f}] GeV".format(mass_range[0], mass_range[1]), ] if mass_range is not None else []), qualifier=QUALIFIER, ATLAS=False) c.ylim(4E-03, 4E-01) c.logy() c.legend() return c
def plot (*argv): """ Method for delegating plotting. """ # Unpack arguments data, args, features, msks_pass, eff_sig = argv with TemporaryStyle() as style: # Style ymin, ymax = 5E-05, 5E+00 scale = 0.8 for coord in ['x', 'y', 'z']: style.SetLabelSize(style.GetLabelSize(coord) * scale, coord) style.SetTitleSize(style.GetTitleSize(coord) * scale, coord) pass style.SetTextSize (style.GetTextSize() * scale) style.SetLegendTextSize(style.GetLegendTextSize() * scale) style.SetTickLength(0.07, 'x') style.SetTickLength(0.07 * (5./6.) * (2./3.), 'y') # Global variable override(s) histstyle = dict(**HISTSTYLE) histstyle[True]['fillstyle'] = 3554 histstyle[True] ['label'] = None histstyle[False]['label'] = None for v in ['linecolor', 'fillcolor']: histstyle[True] [v] = 16 histstyle[False][v] = ROOT.kBlack pass style.SetHatchesLineWidth(1) # Canvas c = rp.canvas(batch=not args.show, num_pads=(2,3)) # Plots # -- Dummy, for proper axes for ipad, pad in enumerate(c.pads()[1:], 1): pad.hist([ymin], bins=[50, 300], linestyle=0, fillstyle=0, option=('Y+' if ipad % 2 else '')) pass # -- Inclusive base = dict(bins=MASSBINS, normalise=True, linewidth=2) for signal, name in zip([False, True], ['bkg', 'sig']): msk = data['signal'] == signal histstyle[signal].update(base) for ipad, pad in enumerate(c.pads()[1:], 1): histstyle[signal]['option'] = 'HIST' pad.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **histstyle[signal]) pass pass for sig in [True, False]: histstyle[sig]['option'] = 'FL' pass c.pads()[0].legend(header='Inclusive selection:', categories=[ ("Multijets", histstyle[False]), ("#it{W} jets", histstyle[True]) ], xmin=0.18, width= 0.60, ymax=0.28 + 0.07, ymin=0.001 + 0.07, columns=2) c.pads()[0]._legends[-1].SetTextSize(style.GetLegendTextSize()) c.pads()[0]._legends[-1].SetMargin(0.35) # -- Tagged base['linewidth'] = 2 for ifeat, feat in enumerate(features): opts = dict( linecolor = rp.colours[(ifeat // 2)], linestyle = 1 + (ifeat % 2), linewidth = 2, ) cfg = dict(**base) cfg.update(opts) msk = (data['signal'] == 0) & msks_pass[feat] pad = c.pads()[1 + ifeat//2] pad.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, label=" " + latex(feat, ROOT=True), **cfg) pass # -- Legend(s) for ipad, pad in enumerate(c.pads()[1:], 1): offsetx = (0.20 if ipad % 2 else 0.05) offsety = 0.20 * ((2 - (ipad // 2)) / float(2.)) pad.legend(width=0.25, xmin=0.68 - offsetx, ymax=0.80 - offsety) pad.latex("Tagged multijets:", NDC=True, x=0.93 - offsetx, y=0.84 - offsety, textcolor=ROOT.kGray + 3, textsize=style.GetLegendTextSize() * 0.8, align=31) pad._legends[-1].SetMargin(0.35) pad._legends[-1].SetTextSize(style.GetLegendTextSize()) pass # Formatting pads margin = 0.2 for ipad, pad in enumerate(c.pads()): tpad = pad._bare() # ROOT.TPad right = ipad % 2 f = (ipad // 2) / float(len(c.pads()) // 2 - 1) tpad.SetLeftMargin (0.05 + 0.15 * (1 - right)) tpad.SetRightMargin(0.05 + 0.15 * right) tpad.SetBottomMargin(f * margin) tpad.SetTopMargin((1 - f) * margin) if ipad == 0: continue pad._xaxis().SetNdivisions(505) pad._yaxis().SetNdivisions(505) if ipad // 2 < len(c.pads()) // 2 - 1: # Not bottom pad(s) pad._xaxis().SetLabelOffset(9999.) pad._xaxis().SetTitleOffset(9999.) else: pad._xaxis().SetTitleOffset(2.7) pass pass # Re-draw axes for pad in c.pads()[1:]: pad._bare().RedrawAxis() pad._bare().Update() pad._xaxis().SetAxisColor(ROOT.kWhite) # Remove "double ticks" pad._yaxis().SetAxisColor(ROOT.kWhite) # Remove "double ticks" pass # Decorations c.pads()[-1].xlabel("Large-#it{R} jet mass [GeV]") c.pads()[-2].xlabel("Large-#it{R} jet mass [GeV]") c.pads()[1].ylabel("#splitline{#splitline{#splitline{#splitline{}{}}{#splitline{}{}}}{#splitline{}{}}}{#splitline{}{#splitline{}{#splitline{}{Fraction of jets}}}}") c.pads()[2].ylabel("#splitline{#splitline{#splitline{#splitline{Fraction of jets}{}}{}}{}}{#splitline{#splitline{}{}}{#splitline{#splitline{}{}}{#splitline{}{}}}}") # I have written a _lot_ of ugly code, but this ^ is probably the worst. c.pads()[0].text(["#sqrt{s} = 13 TeV, #it{W} jet tagging", "Cuts at #varepsilon_{sig}^{rel} = %.0f%%" % eff_sig, ], xmin=0.2, ymax=0.72, qualifier=QUALIFIER) for pad in c.pads()[1:]: pad.ylim(ymin, ymax) pad.logy() pass pass # end temprorary style return c
def jsd(data_, args, feature_dict, pt_range, title=None): """ Perform study of ... Saves plot `figures/jsd.pdf` Arguments: data: Pandas data frame from which to read data. args: Namespace holding command-line arguments. features: Features for ... """ # Extract features and count appearance of each base variable features = [] appearances = [] for basevar in feature_dict.keys(): for suffix in feature_dict[basevar]: features.append(basevar + suffix) appearances.append(len(feature_dict[basevar])) # Select data if pt_range is not None: data = data_[(data_['pt'] > pt_range[0]) & (data_['pt'] < pt_range[1])] else: data = data_ pass # Create local histogram style dict histstyle = dict(**HISTSTYLE) histstyle[True]['label'] = "Pass" histstyle[False]['label'] = "Fail" # Define common variables msk = data['signal'] == 0 effs = np.linspace(0, 100, 10 * 2, endpoint=False)[1:].astype(int) # Loop tagger features jsd = {feat: [] for feat in features} for ifeat, feat in enumerate(features): if len(jsd[feat]): continue # Duplicate feature. # Define cuts cuts = list() for eff in effs: cut = wpercentile(data.loc[msk, feat].values, eff if signal_low(feat) else 100 - eff, weights=data.loc[msk, 'weight_test'].values) cuts.append(cut) pass # Compute KL divergence for successive cuts for cut, eff in zip(cuts, effs): # Create ROOT histograms msk_pass = data[feat] > cut if signal_low(feat): msk_pass = ~msk_pass pass # Get histograms / plot c = rp.canvas(batch=not args.show) h_pass = c.hist(data.loc[msk_pass & msk, 'm'].values, bins=MASSBINS, weights=data.loc[msk_pass & msk, 'weight_test'].values, normalise=True, **histstyle[True]) #, display=False) h_fail = c.hist(data.loc[~msk_pass & msk, 'm'].values, bins=MASSBINS, weights=data.loc[~msk_pass & msk, 'weight_test'].values, normalise=True, **histstyle[False]) #, display=False) # Convert to numpy arrays p = root_numpy.hist2array(h_pass) f = root_numpy.hist2array(h_fail) # Compute Jensen-Shannon divergence jsd[feat].append(JSD(p, f, base=2)) # -- Decorations #c.xlabel("Large-#it{R} jet mass [GeV]") #c.ylabel("Fraction of jets") #c.legend() #c.logy() #c.text(TEXT + [ # "{:s} {} {:.3f}".format(latex(feat, ROOT=True), '<' if signal_low(feat) else '>', cut), # "JSD = {:.4f}".format(jsd[feat][-1])] + \ # (["p_{{T}} #in [{:.0f}, {:.0f}] GeV".format(*pt_range)] if pt_range else []), # qualifier=QUALIFIER, ATLAS=False) # -- Save #if title is None: # c.save('figures/temp_jsd_{:s}_{:.0f}{}.pdf'.format(feat, eff, '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range))) #else: # c.save('figures/'+title+'_temp_jsd_{:s}_{:.0f}{}.pdf'.format(feat, eff, '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range))) pass pass # Compute meaningful limit on JSD jsd_limits = list() sigmoid = lambda x: 1. / (1. + np.exp(-x)) for eff in sigmoid(np.linspace(-5, 5, 20 + 1, endpoint=True)): limits = jsd_limit(data[msk], eff, num_bootstrap=5) jsd_limits.append((eff, np.mean(limits), np.std(limits))) pass # Perform plotting c = plot(args, data, effs, jsd, jsd_limits, features, pt_range, appearances) # Output if title is None: path = 'figures/jsd{}.pdf'.format( '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range)) else: path = 'figures/' + title + '_jsd{}.pdf'.format( '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range)) c.save(path=path) return c, args, path
def plot_individual (*argv): """ Method for delegating plotting. """ # Unpack arguments data, args, features, msks_pass, eff_sig = argv with TemporaryStyle() as style: # Style @TEMP? ymin, ymax = 5E-05, 5E+00 scale = 0.6 for coord in ['x', 'y', 'z']: style.SetLabelSize(style.GetLabelSize(coord) * scale, coord) style.SetTitleSize(style.GetTitleSize(coord) * scale, coord) pass #style.SetTextSize (style.GetTextSize() * scale) #style.SetLegendTextSize(style.GetLegendTextSize() * (scale + 0.03)) style.SetTickLength(0.07, 'x') style.SetTickLength(0.07 * (5./6.) * (2./3.), 'y') # Global variable override(s) histstyle = dict(**HISTSTYLE) histstyle[True]['fillstyle'] = 3554 histstyle[True] ['linewidth'] = 4 histstyle[False]['linewidth'] = 4 histstyle[True] ['label'] = None histstyle[False]['label'] = None for v in ['linecolor', 'fillcolor']: histstyle[True] [v] = 16 histstyle[False][v] = ROOT.kBlack pass style.SetHatchesLineWidth(6) # Loop features ts = style.GetTextSize() lts = style.GetLegendTextSize() for ifeat, feats in enumerate([None] + list(zip(features[::2], features[1::2])), start=-1): first = ifeat == -1 # Style style.SetTitleOffset(1.25 if first else 1.2, 'x') style.SetTitleOffset(1.7 if first else 1.6, 'y') style.SetTextSize(ts * (0.8 if first else scale)) style.SetLegendTextSize(lts * (0.8 + 0.03 if first else scale + 0.03)) # Canvas c = rp.canvas(batch=not args.show, size=(300, 200))#int(200 * (1.45 if first else 1.)))) if first: opts = dict(xmin=0.185, width=0.60, columns=2) c.legend(header=' ', categories=[ ("Multijets", histstyle[False]), ("#it{W} jets", histstyle[True]) ], ymax=0.45, **opts) c.legend(header='Inclusive selection:', ymax=0.40, **opts) #c.pad()._legends[-2].SetTextSize(style.GetLegendTextSize()) #c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize()) c.pad()._legends[-2].SetMargin(0.35) c.pad()._legends[-1].SetMargin(0.35) c.text(["#sqrt{s} = 13 TeV, #it{W} jet tagging", "Cuts at #varepsilon_{sig}^{rel} = %.0f%%" % eff_sig, ], xmin=0.2, ymax=0.80, qualifier=QUALIFIER) else: # Plots # -- Dummy, for proper axes c.hist([ymin], bins=[50, 300], linestyle=0, fillstyle=0) # -- Inclusive base = dict(bins=MASSBINS, normalise=True) for signal, name in zip([False, True], ['bkg', 'sig']): msk = data['signal'] == signal histstyle[signal].update(base) histstyle[signal]['option'] = 'HIST' c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **histstyle[signal]) pass for sig in [True, False]: histstyle[sig]['option'] = 'FL' pass # -- Tagged for jfeat, feat in enumerate(feats): opts = dict( linecolor = rp.colours[((2 * ifeat + jfeat) // 2)], linestyle = 1 + 6 * (jfeat % 2), linewidth = 4, ) cfg = dict(**base) cfg.update(opts) msk = (data['signal'] == 0) & msks_pass[feat] c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, label=" " + latex(feat, ROOT=True), **cfg) pass # -- Legend(s) y = 0.46 if first else 0.68 dy = 0.025 if first else 0.04 c.legend(width=0.25, xmin=0.63, ymax=y) c.latex("Tagged multijets:", NDC=True, x=0.87, y=y + dy, textcolor=ROOT.kGray + 3, textsize=style.GetLegendTextSize() * 0.9, align=31) c.pad()._legends[-1].SetMargin(0.35) c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize()) # Formatting pads tpad = c.pad()._bare() tpad.SetLeftMargin (0.20) tpad.SetBottomMargin(0.12 if first else 0.20) tpad.SetTopMargin (0.39 if first else 0.05) # Re-draw axes tpad.RedrawAxis() tpad.Update() c.pad()._xaxis().SetAxisColor(ROOT.kWhite) # Remove "double ticks" c.pad()._yaxis().SetAxisColor(ROOT.kWhite) # Remove "double ticks" # Decorations c.xlabel("Large-#it{R} jet mass [GeV]") c.ylabel("Fraction of jets") c.text(qualifier=QUALIFIER, xmin=0.25, ymax=0.82) c.ylim(ymin, ymax) c.logy() pass # Save c.save(path = 'figures/jetmasscomparison__eff_sig_{:d}__{}.pdf'.format(int(eff_sig), 'legend' if first else '{}_{}'.format(*feats))) pass pass # end temprorary style return
def main(args): # Definitions histstyle = dict(**HISTSTYLE) # Initialise args, cfg = initialise(args) # Load data mc, features, _ = load_data('data/djr_LCTopo_2.h5') #, test=True) # data, features, _ = load_data('data/djr_LCTopo_data.h5') #, test=True) # histstyle[True]['label'] = 'Multijets' histstyle[False]['label'] = 'Dark jets, Model A, m = 2 TeV' # Add knn variables #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500'] base_var = 'jet_ungrtrk500' kNN_var = base_var.replace('jet', 'knn') #base_vars = ['lead_'+base_var, 'sub_'+base_var] #kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var] """ with Profile("Add variables"): #for i in range(len(base_var)): print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var) add_knn(data, newfeat='lead_'+kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) add_knn(data, newfeat='sub_'+kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) add_knn(mc, newfeat='lead_'+kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) add_knn(mc, newfeat='sub_'+kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) """ #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) bins_pt = np.linspace(450, 5000, 50) # Useful masks msk_bkg_data = data['signal'] == 0 msk_bkg_mc = (mc['signal'] == 0) #& (mc['weight']<0.0002) msk_sig_mc = (mc['signal'] == 1) #& (mc['weight']<0.0002) msk_CR = (mc['lead_jet_ungrtrk500'] < 20) | (mc['sub_jet_ungrtrk500'] < 20) scale = 139 * 1000000 # (inverse nanobarn) # pT dist c = rp.canvas(batch=True) hist_incl_data = c.hist(data.loc[msk_bkg_data, 'jet_pt'].values, bins=bins_pt, weights=data.loc[msk_bkg_data, 'weight'].values, label="Data, control region", normalise=False, linecolor=ROOT.kGreen + 2) hist_incl_mc = c.hist(mc.loc[msk_bkg_mc, 'sub_jet_pt'].values, bins=bins_pt, weights=scale * mc.loc[msk_bkg_mc, 'weight'].values, label="MC, scaled with lumi", normalise=False, linecolor=ROOT.kViolet + 2) hist_incl_sig = c.hist(mc.loc[msk_sig_mc, 'sub_jet_pt'].values, bins=bins_pt, weights=mc.loc[msk_sig_mc, 'weight'].values, label="Combined Signal", normalise=False, linecolor=ROOT.kOrange + 2) c.legend(width=0.4, xmin=0.5, ymax=0.9) c.ylabel("Number of events") c.xlabel("Sub-leading jet pT [GeV]") c.logy() #c.ylim(0.00005, 5) #c.save('figures/distributions/mjj_Bkg_CR20.pdf'.format(knnCut)) #c.save('figures/distributions/mjj_Bkg_CR20.eps'.format(knnCut)) c.save('figures/distributions/sub_pt_bkg_data_mc.pdf') c.save('figures/distributions/sub_pt_bkg_data_mc.eps') print "Data bkg effective entries: ", hist_incl_data.GetEffectiveEntries() print "MC bkg effective entries: ", hist_incl_mc.GetEffectiveEntries() print "Data bkg integral: ", hist_incl_data.Integral() print "MC bkg integral: ", hist_incl_mc.Integral() del c c = rp.canvas(batch=True) hist_bkg_CR = c.hist(mc.loc[(msk_bkg_mc & msk_CR), 'lead_jet_pt'].values, bins=bins_pt, weights=scale * mc.loc[(msk_bkg_mc & msk_CR), 'weight'].values, label="MC, control region", normalise=False, linecolor=ROOT.kGreen + 2) hist_sig_CR = c.hist(mc.loc[(msk_sig_mc & msk_CR), 'lead_jet_pt'].values, bins=bins_pt, weights=mc.loc[(msk_sig_mc & msk_CR), 'weight'].values, label="MC, control region", normalise=False, linecolor=ROOT.kGreen + 2) print "CR sig contamination (eff. entries): ", hist_sig_CR.GetEffectiveEntries( ) / (hist_bkg_CR.GetEffectiveEntries() + hist_sig_CR.GetEffectiveEntries()) print "CR sig contamination (integral): ", hist_sig_CR.Integral() / ( hist_bkg_CR.Integral() + hist_sig_CR.Integral()) print "CR sig efficiency (eff. entries): ", hist_sig_CR.GetEffectiveEntries( ) / hist_incl_sig.GetEffectiveEntries() print "CR sig efficiency (integral): ", hist_sig_CR.Integral( ) / hist_incl_sig.Integral()
def plot(*argv): """ Method for delegating plotting. """ # Unpack arguments args, data, effs, jsd, jsd_limits, features, pt_range, appearances = argv with TemporaryStyle() as style: # Style style.SetTitleOffset(1.5, 'x') style.SetTitleOffset(2.0, 'y') # Canvas c = rp.canvas(batch=not args.show) # Plots ref = ROOT.TH1F('ref', "", 10, 0., 1.) for i in range(ref.GetXaxis().GetNbins()): ref.SetBinContent(i + 1, 1) pass c.hist(ref, linecolor=ROOT.kGray + 2, linewidth=1) linestyles = [1, 3, 5, 7] width = 0.15 if len(appearances) != 2: for is_simple in [True, False]: indices = np.array([0] + appearances).cumsum() for i in range(len(indices) - 1): for ifeat, feat in enumerate( features[indices[i]:indices[i + 1]]): if is_simple != signal_low(feat): continue colour = rp.colours[i % len(rp.colours)] linestyle = 1 + ifeat if ifeat == 0: markerstyle = 20 else: markerstyle = 23 + ifeat c.plot(jsd[feat], bins=np.array(effs) / 100., linecolor=colour, markercolor=colour, linestyle=linestyle, markerstyle=markerstyle, label=latex(feat, ROOT=True), option='PL') pass c.legend(header=("Analytical:" if is_simple else "MVA:"), width=width * (1 + 0.8 * int(is_simple)), xmin=0.42 + (width + 0.05) * (is_simple), ymax=0.888, columns=2 if is_simple else 1, margin=0.35) # moved one intendation to the left else: for first_var in [True, False]: indices = np.array([0] + appearances).cumsum() for i in [0, 1]: if i == 0 and not first_var: continue if i == 1 and first_var: continue for ifeat, feat in enumerate( features[indices[i]:indices[i + 1]]): colour = rp.colours[i % len(rp.colours)] linestyle = linestyles[ifeat] if ifeat == 0: markerstyle = 20 else: markerstyle = 23 + ifeat c.plot(jsd[feat], bins=np.array(effs) / 100., linecolor=colour, markercolor=colour, linestyle=linestyle, markerstyle=markerstyle, label=latex(feat, ROOT=True), option='PL') pass c.legend(header=(latex(features[0], ROOT=True) + "-based:" if first_var else latex(features[appearances[1]], ROOT=True) + "-based:"), width=width, xmin=0.45 + (width + 0.06) * (first_var), ymax=0.888) pass #### c.legend(header=(features[0]+":" if first_var else features[appearances[1]]+":"), #work in progress!!!!!!!!!!!!!!!!!!!!! #### width=width, xmin=0.45 + (width + 0.06) * (first_var), ymax=0.888) # Meaningful limits on JSD x, y, ey = map(np.array, zip(*jsd_limits)) ex = np.zeros_like(ey) gr = ROOT.TGraphErrors(len(x), x, y, ex, ey) smooth_tgrapherrors(gr, ntimes=2) c.graph(gr, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3') # Redraw axes c.pads()[0]._primitives[0].Draw('AXIS SAME') # Decorations c.xlabel("Background efficiency #varepsilon_{bkg}^{rel}") c.ylabel("Mass correlation, JSD") c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False) c.text(["#sqrt{s} = 13 TeV", "Multijets"] + \ (["p_{T} [GeV] #in", " [{:.0f}, {:.0f}]".format(*pt_range)] if pt_range else []), ymax=0.85, ATLAS=None) c.latex("Maximal sculpting", 0.065, 1.2, align=11, textsize=11, textcolor=ROOT.kGray + 2) c.xlim(0, 1) #c.ymin(5E-05) c.ymin(1E-06) #chosen for highest pT bin c.padding(0.45) c.logy() for leg in c.pad()._legends: leg.SetMargin(0.5) pass x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double( 0), ROOT.Double(0) idx = gr.GetN() - 7 gr.GetPoint(idx, x_, y_) ey_ = gr.GetErrorY(idx) x_, y_ = map(float, (x_, y_)) c.latex("Statistical limit", x_, y_ - ey_ / 2., align=23, textsize=11, angle=12, textcolor=ROOT.kGray + 2) pass return c
def main (args): # Initialise args, cfg = initialise(args) # Load data data, _, _ = load_data('data/' + args.input) #, test=True) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # ------------------------------------------------------------------------- #### #### # Initialise Keras backend #### initialise_backend(args) #### #### # Neural network-specific initialisation of the configuration dict #### initialise_config(args, cfg) #### #### # Keras import(s) #### from keras.models import load_model #### #### # NN #### from run.adversarial.common import add_nn #### with Profile("NN"): #### classifier = load_model('models/adversarial/classifier/full/classifier.h5') #### add_nn(data, classifier, 'NN') #### pass # ------------------------------------------------------------------------- # Fill measured profile profile_meas, (x,percs, err) = fill_profile_1D(data[msk_bkg]) weights = 1/err # Add k-NN variable knnfeat = 'knn' orgfeat = VAR add_knn(data, newfeat=knnfeat, path='models/knn/{}_{}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) # Loading KNN classifier knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) #knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) X = x.reshape(-1,1) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 # Short-hands vbins, vmin, vmax = AXIS[VARX] # Re-binned bin edges @TODO: Make standardised right away? # edges = np.interp(np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), # range(vbins + 1), # np.linspace(vmin, vmax, vbins + 1, endpoint=True)) fineBins = np.linspace(vmin, vmax, vbins*rebin + 1, endpoint=True) orgBins = np.linspace(vmin, vmax, vbins + 1, endpoint=True) # Re-binned bin centres fineCentres = fineBins[:-1] + 0.5 * np.diff(fineBins) orgCentres = orgBins[:-1] + 0.5 * np.diff(orgBins) pass # Get predictions evaluated at re-binned bin centres if 'erf' in FIT: fit = func(fineCentres, knn[0], knn[1], knn[2]) print "Check: ", func([1500, 2000], knn[0], knn[1], knn[2]) else: fit = knn.predict(fineCentres.reshape(-1,1)) #centres.reshape(-1,1)) # Fill ROOT "profile" profile_fit = ROOT.TH1F('profile_fit', "", len(fineBins) - 1, fineBins.flatten('C')) root_numpy.array2hist(fit, profile_fit) knn1 = PolynomialFeatures(degree=2) X_poly = knn1.fit_transform(X) reg = LinearRegression(fit_intercept=False) #fit_intercept=False) reg.fit(X_poly, percs, weights) score = round(reg.score(X_poly, percs), 4) coef = reg.coef_ intercept = reg.intercept_ print "COEFFICIENTS: ", coef, intercept TCoef = ROOT.TVector3(coef[0], coef[1], coef[2]) outFile = ROOT.TFile.Open("models/{}_jet_ungrtrk500_eff{}_stat{}_{}.root".format(FIT, EFF, MIN_STAT, MODEL),"RECREATE") outFile.cd() TCoef.Write() profile_fit.SetName("kNNfit") profile_fit.Write() outFile.Close() # profile_meas2 = ROOT.TH1F('profile_meas', "", len(x) - 1, x.flatten('C')) # root_numpy.array2hist(percs, profile_meas2) profile_meas2 = ROOT.TGraph(len(x), x, percs) pass # Plotting with Profile("Plotting"): # Plot plot(profile_meas2, profile_fit) pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency # MC weights are scaled with lumi. This is just for better comparison #if INPUT =="mc": # data.loc[:,'TotalEventWeight'] /= 139000000. for sig, msk in zip([True, False], [msk_sig, msk_bkg]): # Define arrays shape = AXIS[VARX][0] bins = np.linspace(AXIS[VARX][1], AXIS[VARX][2], AXIS[VARX][0]+ 1, endpoint=True) #bins = np.linspace(AXIS[VARX][1], 4000, 40, endpoint=True) #bins = np.append(bins, [4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000]) print "HERE: ", bins #x, y = (np.zeros(shape) for _ in range(2)) # Create `profile` histogram profile_knn = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') ) profile_org = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') ) # Compute inclusive efficiency in bins of `VARX` effs = list() for i in range(shape): msk_bin = (data[VARX] > bins[i]) & (data[VARX] <= bins[i+1]) msk_pass = data[knnfeat] > 0 # <? msk_pass_org = data[orgfeat] > 70 # <? num = data.loc[msk & msk_bin & msk_pass, 'TotalEventWeight'].values.sum() num_org = data.loc[msk & msk_bin & msk_pass_org, 'TotalEventWeight'].values.sum() den = data.loc[msk & msk_bin,'TotalEventWeight'].values.sum() if den > 0: eff = num/den *100. eff_org = num_org/den *100. profile_knn.SetBinContent(i + 1, eff) profile_org.SetBinContent(i + 1, eff_org) effs.append(eff) #else: #print i, "Density = 0" pass c = rp.canvas(batch=True) leg = ROOT.TLegend(0.2, 0.75, 0.5, 0.85) leg.AddEntry(profile_knn, "#it{n}_{trk}^{#varepsilon=%s%%} > 0" % ( EFF), "l") leg.AddEntry(profile_org, "#it{n}_{trk} > 70", "l") leg.Draw() pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.10) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile_knn.SetLineColor(rp.colours[1]) profile_org.SetLineColor(rp.colours[2]) profile_knn.SetMarkerStyle(24) profile_knn.GetXaxis().SetTitle( "#it{m}_{jj} [GeV]" ) #latex(VARX, ROOT=True) + "[GeV]") #+ " = log(m^{2}/p_{T}^{2})") #profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True))# + " = log(m^{2}/p_{T}^{2})") profile_org.GetYaxis().SetTitle("Selection efficiency (%)") # for #it{n}_{trk}^{#varepsilon=%s%%}>0" % ( EFF)) profile_knn.GetYaxis().SetNdivisions(505) #profile_knn.GetXaxis().SetNdivisions(505) profile_knn.GetXaxis().SetTitleOffset(1.4) profile_knn.GetYaxis().SetTitleOffset(1.8) profile_knn.GetXaxis().SetRangeUser(*XRANGE) profile_org.GetXaxis().SetRangeUser(*XRANGE) yrange = (0., EFF*3) #2.0 percent if yrange: profile_knn.GetYaxis().SetRangeUser(*yrange) profile_org.GetYaxis().SetRangeUser(*yrange) pass # Draw profile_org.Draw() profile_knn.Draw("same") # Save mkdir('figures/knn/') c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.pdf'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT)) #c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.png'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL, MIN_STAT)) c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.eps'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT)) del c pass return
def plot1D (*argv): """ Method for delegating 1D plotting. """ # Unpack arguments graphs, ddt, arr_x, variable, fit_range = argv # Style ROOT.gStyle.SetTitleOffset(1.4, 'x') # Canvas c = rp.canvas(batch=True) # Setup pad = c.pads()[0]._bare() pad.cd() pad.SetTopMargin(0.10) pad.SetTopMargin(0.10) # Profiles if variable == VAR_TAU21: c.graph(graphs[variable], label="Original, #tau_{21}", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs[variable + 'DDT'], label="Transformed, #tau_{21}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') elif variable == VAR_N2: c.graph(graphs[variable], label="Original, N_{2}", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs[variable + 'DDT'], label="Transformed, N_{2}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') elif variable == VAR_DECDEEP: c.graph(graphs[variable], label="Original, dec_deepWvsQCD", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs[variable + 'DDT'], label="Transformed, dec_deepWvsQCD^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') elif variable == VAR_DEEP: c.graph(graphs[variable], label="Original, deepWvsQCD", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs[variable + 'DDT'], label="Transformed, deepWvsQCD^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') # Fit x1, x2 = min(arr_x), max(arr_x) intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_ y1 = intercept + x1 * coef y2 = intercept + x2 * coef c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='Linear fit', linewidth=1, linestyle=1, option='L') # Decorations c.xlabel("jet #rho^{DDT} = log[m^{2} / (p_{T} #times 1 GeV)]") if variable == VAR_TAU21: c.ylabel("#LT#tau_{21}#GT, #LT#tau_{21}^{DDT}#GT") elif variable == VAR_N2: c.ylabel("#LTN_{2}#GT, #LTN_{2}^{DDT}#GT") elif variable == VAR_DECDEEP: c.ylabel("#LTdec_deepWvsQCD#GT, #LTdec_deepWvsQCD^{DDT}#GT") elif variable == VAR_DEEP: c.ylabel("#LTdeepWvsQCD#GT, #LTdeepWvsQCD^{DDT}#GT") c.text(["#sqrt{s} = 13 TeV, Multijets"], qualifier=QUALIFIER, ATLAS=False) c.legend(width=0.25, xmin=0.57, ymax=0.86) #None if "Internal" in QUALIFIER else 0.93) c.xlim(0, 6.0) if variable == VAR_N2: ymax = 0.8 else: ymax = 1.4 c.ylim(0, ymax) c.latex("Fit range", sum(fit_range) / 2., 0.08, textsize=13, textcolor=ROOT.kGray + 2) c.latex("Fit parameters:", 0.37, 0.7*ymax, align=11, textsize=14, textcolor=ROOT.kBlack) c.latex(" intercept = {:7.4f}".format(intercept[0]), 0.37, 0.65*ymax, align=11, textsize=14, textcolor=ROOT.kBlack) c.latex(" coef = {:7.4f}".format(coef[0]), 0.37, 0.6*ymax, align=11, textsize=14, textcolor=ROOT.kBlack) c.xline(fit_range[0], ymax=0.82, text_align='BR', linecolor=ROOT.kGray + 2) c.xline(fit_range[1], ymax=0.82, text_align='BL', linecolor=ROOT.kGray + 2) # Save mkdir('figures/ddt/') c.save('figures/ddt/ddt_{}.pdf'.format(variable)) return
def test(data, variable, bg_eff, signal_above=False): # Shout out to Cynthia Brewer and Mark Harrower # [http://colorbrewer2.org]. Palette is colorblind-safe. rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # Fill measured profile with Profile("filling profile"): profile_meas, _ = fill_profile(data[msk_bkg], variable, bg_eff, signal_above=signal_above) # Add k-NN variable with Profile("adding variable"): knnfeat = 'knn' #add_knn(data, feat=variable, newfeat=knnfeat, path='knn_fitter/models/knn_{}_{}.pkl.gz'.format(variable, bg_eff)) add_knn(data, feat=variable, newfeat=knnfeat, path=args.output + '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) # Loading KNN classifier with Profile("loading model"): #knn = loadclf('knn_fitter/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) knn = loadclf( args.output + '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 edges, centres = dict(), dict() for ax, var in zip(['x', 'y'], [VARX, VARY]): # Short-hands vbins, vmin, vmax = AXIS[var] # Re-binned bin edges edges[ax] = np.interp( np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), range(vbins + 1), np.linspace(vmin, vmax, vbins + 1, endpoint=True)) # Re-binned bin centres centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax]) pass # Get predictions evaluated at re-binned bin centres g = dict() g['x'], g['y'] = np.meshgrid(centres['x'], centres['y']) g['x'], g['y'] = standardise(g['x'], g['y']) X = np.vstack((g['x'].flatten(), g['y'].flatten())).T fit = knn.predict(X).reshape(g['x'].shape).T # Fill ROOT "profile" profile_fit = ROOT.TH2F('profile_fit', "", len(edges['x']) - 1, edges['x'].flatten('C'), len(edges['y']) - 1, edges['y'].flatten('C')) root_numpy.array2hist(fit, profile_fit) pass # Plotting for fit in [False, True]: # Select correct profile profile = profile_fit if fit else profile_meas # Plot plot(profile, fit, variable, bg_eff) pass pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency for sig, msk in zip([True, False], [msk_sig, msk_bkg]): if sig: print "working on signal" else: print "working on bg" if sig: rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) else: rgbs = [(255 / 255., 51 / 255., 4 / 255.), (247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.array([0] + list( np.linspace(0, 1, nb_cols - 1, endpoint=True) * (1. - bg_eff / 100.) + bg_eff / 100.)) pass ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) # Define arrays shape = (AXIS[VARX][0], AXIS[VARY][0]) bins = [ np.linspace(AXIS[var][1], AXIS[var][2], AXIS[var][0] + 1, endpoint=True) for var in VARS ] x, y, z = (np.zeros(shape) for _ in range(3)) # Create `profile` histogram profile = ROOT.TH2F('profile', "", len(bins[0]) - 1, bins[0].flatten('C'), len(bins[1]) - 1, bins[1].flatten('C')) # Compute inclusive efficiency in bins of `VARY` effs = list() for edges in zip(bins[1][:-1], bins[1][1:]): msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1]) if signal_above: msk_pass = data[knnfeat] > 0 # ensure correct cut direction else: msk_pass = data[knnfeat] < 0 num_msk = msk * msk_bin * msk_pass num = data.loc[num_msk, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() effs.append(num / den) pass # Fill profile with Profile("Fill profile"): for i, j in itertools.product(*map(range, shape)): #print "Fill profile - (i, j) = ({}, {})".format(i,j) # Bin edges in x and y edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)] # Masks msks = [ (data[var] > edges[dim][0]) & (data[var] <= edges[dim][1]) for dim, var in enumerate(VARS) ] msk_bin = reduce(lambda x, y: x & y, msks) # Set non-zero bin content if np.sum(msk & msk_bin): if signal_above: msk_pass = data[ knnfeat] > 0 # ensure correct cut direction else: msk_pass = data[knnfeat] < 0 num_msk = msk * msk_bin * msk_pass num = data.loc[num_msk, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() eff = num / den profile.SetBinContent(i + 1, j + 1, eff) pass c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True) + " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle("Large-#it{R} jet " + latex(VARY, ROOT=True) + " [GeV]") profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" % (latex(variable, ROOT=True), bg_eff)) profile.GetYaxis().SetNdivisions(505) profile.GetZaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.8) profile.GetZaxis().SetTitleOffset(1.3) zrange = (0., 1.) if zrange: profile.GetZaxis().SetRangeUser(*zrange) pass profile.SetContour(NB_CONTOUR) # Draw profile.Draw('COLZ') # Decorations c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15, ATLAS=False) c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"], ATLAS=False) # -- Efficiencies xaxis = profile.GetXaxis() yaxis = profile.GetYaxis() tlatex = ROOT.TLatex() tlatex.SetTextColor(ROOT.kGray + 2) tlatex.SetTextSize(0.023) tlatex.SetTextFont(42) tlatex.SetTextAlign(32) xt = xaxis.GetBinLowEdge(xaxis.GetNbins()) for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)): yt = yaxis.GetBinCenter(ibin) tlatex.DrawLatex( xt, yt, "%s%.1f%%" % ("#bar{#varepsilon}^{rel}_{%s} = " % ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.)) pass # -- Bounds BOUNDS[0].DrawCopy("SAME") BOUNDS[1].DrawCopy("SAME") c.latex("m > 50 GeV", -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3) c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3) # Save mkdir('knn_fitter/figures/') c.save('knn_fitter/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', variable, bg_eff)) mkdir(args.output + '/figures/') c.save(args.output + '/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', variable, bg_eff)) pass return
def plot (*argv): """ Method for delegating plotting. """ # Unpack arguments data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut, var = argv with TemporaryStyle() as style: # Set styles scale = 0.9 style.SetTextSize(scale * style.GetTextSize()) for coord in ['x', 'y', 'z']: style.SetLabelSize(scale * style.GetLabelSize(coord), coord) style.SetTitleSize(scale * style.GetTitleSize(coord), coord) pass # Canvas c = rp.canvas(num_pads=2, fraction=0.55, size=(int(800 * 600 / 857.), 600), batch=not args.show) c.pads()[0]._bare().SetTopMargin(0.10) c.pads()[0]._bare().SetRightMargin(0.23) c.pads()[1]._bare().SetRightMargin(0.23) # To fix 30.5 --> 30 for NPV bins[-1] = np.floor(bins[-1]) # Plots # -- References boxopts = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST') c.pads()[0].hist([2], bins=[bins[0], bins[-1]], **boxopts) c.pads()[1].hist([1], bins=[bins[0], bins[-1]], **boxopts) for is_simple in [True, False]: for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)): opts = dict( linecolor = rp.colours[(ifeat // 2)], markercolor = rp.colours[(ifeat // 2)], fillcolor = rp.colours[(ifeat // 2)], linestyle = 1 + (ifeat % 2), alpha = 0.3, option = 'E2', ) mean_rej, std_rej = map(np.array, zip(*rejs[feat])) # @TEMP #mean_rej, std_rej = map(np.array, zip(*effs[feat])) # @TEMP mean_jsd, std_jsd = map(np.array, zip(*jsds[feat])) # Error boxes x = np.array(bins[:-1]) + 0.5 * np.diff(bins) xerr = 0.5 * np.diff(bins) graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej) graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd) c.pads()[0].hist(graph_rej, **opts) c.pads()[1].hist(graph_jsd, **opts) # Markers and lines opts['option'] = 'PE2L' opts['markerstyle'] = 20 + 4 * (ifeat % 2) graph_rej = ROOT.TGraph(len(x), meanx, mean_rej) graph_jsd = ROOT.TGraph(len(x), meanx, mean_jsd) c.pads()[0].hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts) c.pads()[1].hist(graph_jsd, label=latex(feat, ROOT=True) if is_simple else None, **opts) pass pass # Draw class-specific legend width = 0.20 c.pads()[0].legend(header='MVA:', width=width, xmin=0.79, ymax=0.92) c.pads()[1].legend(header='Analytical:', width=width, xmin=0.79, ymax=0.975) # Meaningful limits on JSD x, y, ey_stat, ey_syst = map(np.array, zip(*jsd_limits)) ex = np.zeros_like(x) x[0] = bins[0] x[-1] = bins[-1] format = lambda arr: arr.flatten('C').astype(float) gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat]))) gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))]))) smooth_tgrapherrors(gr_stat, ntimes=2) smooth_tgrapherrors(gr_comb, ntimes=2) c.pads()[1].graph(gr_comb, fillcolor=ROOT.kBlack, alpha=0.03, option='3') c.pads()[1].graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3') x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0) idx = gr_comb.GetN() - 1 gr_comb.GetPoint(idx, x_, y_) ey_ = gr_comb.GetErrorY(idx) x_, y_ = map(float, (x_, y_)) c.pads()[1].latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit ", x_, y_ + ey_, align=31, textsize=11, angle=0, textcolor=ROOT.kGray + 2) # Decorations for pad in c.pads(): pad._xaxis().SetNdivisions(504) pass # -- x-axis label if var == 'pt': xlabel = "Large-#it{R} jet p_{T} [GeV]" elif var == 'npv': xlabel = "Number of reconstructed vertices N_{PV}" else: raise NotImplementedError("Variable {} is not supported.".format(xlabel)) c.xlabel(xlabel) c.pads()[0].ylabel("1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%") c.pads()[1].ylabel("1/JSD @ #varepsilon_{sig}^{rel} = 50%") xmid = (bins[0] + bins[-1]) * 0.5 c.pads()[0].latex("Random guessing", xmid, 2 * 0.9, align=23, textsize=11, angle=0, textcolor=ROOT.kGray + 2) c.pads()[1].latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11, angle=0, textcolor=ROOT.kGray + 2) c.text([], qualifier=QUALIFIER, xmin=0.15, ymax=0.93) c.text(["#sqrt{s} = 13 TeV, #it{W} jet tagging"] + \ (['m #in [60, 100] GeV'] if masscut else []), ATLAS=False, ymax=0.76) c.pads()[1].text(["Multijets"], ATLAS=False) c.pads()[0].ylim(1, 500) c.pads()[1].ylim(0.2, 2E+05) c.pads()[0].logy() c.pads()[1].logy() pass # Temporary style scope return c
def plot_distributions(data, var, bins): """ Method for delegating plotting """ h_D2lowmass = None for mass, (mass_down, mass_up) in enumerate(zip(MASS_BINS[:-1], MASS_BINS[1:])): # Canvas c = rp.canvas(batch=True) # Fill histograms msk = (data['m'] >= mass_down) & (data['m'] < mass_up) h_D2 = c.hist(data.loc[msk, var].values, bins=bins, weights=data.loc[msk, 'weight_test'].values, display=False) h_D2CSS = c.hist(data.loc[msk, var + "CSS"].values, bins=bins, weights=data.loc[msk, 'weight_test'].values, display=False) if h_D2lowmass is not None: sumChi2, bestOmega, profile_css, profile0rebin = fit( h_D2, 1.0, h_D2lowmass, "%.2f" % mass) normalise(profile_css, density=True) else: profile_css = None pass h_D2 = kde(h_D2) h_D2CSS = kde(h_D2CSS) normalise(h_D2, density=True) normalise(h_D2CSS, density=True) if h_D2lowmass is None: h_D2lowmass = h_D2.Clone('h_lowmass') pass # Draw histograms lowmassbin = "#it{{m}} #in [{:.1f}, {:.1f}] GeV".format( MASS_BINS[0], MASS_BINS[1]).replace('.0', '') massbin = "#it{{m}} #in [{:.1f}, {:.1f}] GeV".format( MASS_BINS[mass], MASS_BINS[mass + 1]).replace('.0', '') c.hist(h_D2lowmass, label=latex(var, ROOT=True) + ", {}".format(lowmassbin), linecolor=rp.colours[1], fillcolor=rp.colours[1], alpha=0.5, option='HISTL', legend_option='FL') c.hist(h_D2, label=latex(var, ROOT=True) + ", {}".format(massbin), linecolor=rp.colours[4], linestyle=2, option='HISTL') c.hist(h_D2CSS, label=latex(var + 'CSS', ROOT=True) + ", {}".format(massbin), linecolor=rp.colours[3], option='HISTL') ''' # Draw reference histogram from fit. if profile_css is not None: c.hist(profile_css, linecolor=ROOT.kBlack, linestyle=2, label='Transformed hist (CSS)') pass #''' # Decorations c.xlabel(latex(var, ROOT=True) + ", " + latex(var + 'CSS', ROOT=True)) c.ylabel("Number of jets p.d.f.") c.ylim( 0, 5.2 ) #now optimized for N2, so probably need to adjust for other variables c.legend(xmin=0.45, ymax=0.76, width=0.25) c.text(["#sqrt{s} = 13 TeV, Multijets", "KDE smoothed"], qualifier=QUALIFIER, ATLAS=False) c.pad()._xaxis().SetTitleOffset(1.3) c.pad()._yaxis().SetNdivisions(105) c.pad()._primitives[-1].Draw('SAME AXIS') # Save c.save('figures/css/cssProfile_{}_{}.pdf'.format(var, mass)) pass return
def plot_full (*argv): """ Method for delegating plotting. """ # Unpack arguments data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut = argv with TemporaryStyle() as style: # Set styles scale = 1.0 scale_axis = 0.7 margin_squeeze = 0.035 margin_vert = 0.20 margin_hori = 0.35 size = (800, 600) style.SetTextSize(scale_axis * style.GetTextSize()) for coord in ['x', 'y', 'z']: style.SetLabelSize(scale_axis * style.GetLabelSize(coord), coord) style.SetTitleSize(scale_axis * style.GetTitleSize(coord), coord) pass style.SetLegendTextSize(style.GetLegendTextSize() * scale) style.SetTickLength(0.05, 'x') style.SetTickLength(0.07 * (float(size[0])/float(size[1])) * (margin_hori/margin_vert), 'y') # Canvas c = rp.canvas(num_pads=(2,2), size=size, batch=not args.show) # Margins c.pads()[0]._bare().SetTopMargin (margin_vert) c.pads()[1]._bare().SetTopMargin (margin_vert) c.pads()[2]._bare().SetBottomMargin(margin_vert) c.pads()[3]._bare().SetBottomMargin(margin_vert) c.pads()[0]._bare().SetLeftMargin (margin_hori) c.pads()[2]._bare().SetLeftMargin (margin_hori) c.pads()[1]._bare().SetRightMargin (margin_hori) c.pads()[3]._bare().SetRightMargin (margin_hori) c.pads()[1]._bare().SetLeftMargin (margin_squeeze) c.pads()[3]._bare().SetLeftMargin (margin_squeeze) c.pads()[0]._bare().SetRightMargin (margin_squeeze) c.pads()[2]._bare().SetRightMargin (margin_squeeze) c.pads()[0]._bare().SetBottomMargin(margin_squeeze) c.pads()[1]._bare().SetBottomMargin(margin_squeeze) c.pads()[2]._bare().SetTopMargin (margin_squeeze) c.pads()[3]._bare().SetTopMargin (margin_squeeze) # To fix 30.5 --> 30 for NPV bins['npv'][-1] = np.floor(bins['npv'][-1]) # Plots # -- References boxopts = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST') c.pads()[0].hist([2], bins=[bins['pt'] [0], bins['pt'] [-1]], **boxopts) c.pads()[1].hist([2], bins=[bins['npv'][0], bins['npv'][-1]], **boxopts) c.pads()[2].hist([1], bins=[bins['pt'] [0], bins['pt'] [-1]], **boxopts) c.pads()[3].hist([1], bins=[bins['npv'][0], bins['npv'][-1]], **boxopts) nb_col = 2 for col, var in enumerate(['pt', 'npv']): for is_simple in [True, False]: for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)): opts = dict( linecolor = rp.colours[(ifeat // 2)], markercolor = rp.colours[(ifeat // 2)], fillcolor = rp.colours[(ifeat // 2)], linestyle = 1 + (ifeat % 2), alpha = 0.3, option = 'E2', ) mean_rej, std_rej = map(np.array, zip(*rejs[var][feat])) # @TEMP mean_jsd, std_jsd = map(np.array, zip(*jsds[var][feat])) # Only _show_ mass-decorrelated features for `npv` if (col == 1) and (ifeat % 2 == 0): mean_rej *= -9999. mean_jsd *= -9999. pass # Error boxes x = np.array(bins[var][:-1]) + 0.5 * np.diff(bins[var]) xerr = 0.5 * np.diff(bins[var]) graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej) graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd) c.pads()[col + 0 * nb_col].hist(graph_rej, **opts) c.pads()[col + 1 * nb_col].hist(graph_jsd, **opts) # Markers and lines opts['option'] = 'PE2L' opts['markerstyle'] = 20 + 4 * (ifeat % 2) graph_rej = ROOT.TGraph(len(x), meanx[var], mean_rej) graph_jsd = ROOT.TGraph(len(x), meanx[var], mean_jsd) c.pads()[col + 0 * nb_col].hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts) c.pads()[col + 1 * nb_col].hist(graph_jsd, label=latex(feat, ROOT=True) if is_simple else None, **opts) pass pass # Meaningful limits on JSD x, y, ey_stat, ey_syst = map(np.array, zip(*jsd_limits[var])) ex = np.zeros_like(x) x[0] = bins[var][0] x[-1] = bins[var][-1] format = lambda arr: arr.flatten('C').astype(float) gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat]))) gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))]))) smooth_tgrapherrors(gr_stat, ntimes=2) smooth_tgrapherrors(gr_comb, ntimes=2) c.pads()[col + 1 * nb_col].graph(gr_comb, fillcolor=ROOT.kBlack, alpha=0.03, option='3') c.pads()[col + 1 * nb_col].graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3') if col == 0: x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0) idx = gr_comb.GetN() - 1 gr_comb.GetPoint(idx, x_, y_) ey_ = gr_comb.GetErrorY(idx) x_, y_ = map(float, (x_, y_)) c.pads()[col + 1 * nb_col].latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit ", x_, y_ + 0.75 * ey_, align=31, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2) pass # Decorations # -- offsets c.pads()[0]._xaxis().SetLabelOffset(9999.) c.pads()[0]._xaxis().SetTitleOffset(9999.) c.pads()[1]._xaxis().SetLabelOffset(9999.) c.pads()[1]._xaxis().SetTitleOffset(9999.) c.pads()[2]._xaxis().SetTitleOffset(2.3) c.pads()[3]._xaxis().SetTitleOffset(2.3) c.pads()[1]._yaxis().SetLabelOffset(9999.) c.pads()[1]._yaxis().SetTitleOffset(9999.) c.pads()[3]._yaxis().SetLabelOffset(9999.) c.pads()[3]._yaxis().SetTitleOffset(9999.) # -- x-axis label if var == 'pt': xlabel = "Large-#it{R} jet p_{T} [GeV]" elif var == 'npv': xlabel = "Number of reconstructed vertices N_{PV}" else: raise NotImplementedError("Variable {} is not supported.".format(var)) c.pads()[col + 1 * nb_col].xlabel(xlabel) if col == 0: pattern = "#splitline{#splitline{#splitline{%s}{}}{#splitline{}{}}}{#splitline{#splitline{}{}}{#splitline{}{}}}" c.pads()[col + 0 * nb_col].ylabel(pattern % "1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%") c.pads()[col + 1 * nb_col].ylabel(pattern % "1/JSD @ #varepsilon_{sig}^{rel} = 50%") pass xmid = (bins[var][0] + bins[var][-1]) * 0.5 c.pads()[col + 0 * nb_col].latex("Random guessing", xmid, 2 * 0.9, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2) c.pads()[col + 1 * nb_col].latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2) c.pads()[col + 0 * nb_col].ylim(1, 70) # 500 c.pads()[col + 1 * nb_col].ylim(0.2, 7E+04) # 2E+05 c.pads()[col + 0 * nb_col].logy() c.pads()[col + 1 * nb_col].logy() pass # end: loop `col` # Draw class-specific legend width = margin_hori - 0.03 c.pads()[col + 0 * nb_col].legend(header='MVA:', width=width, xmin=1. - margin_hori + 0.03, ymax=1. - margin_vert + 0.02) c.pads()[col + 1 * nb_col].legend(header='Analytical:', width=width, xmin=1. - margin_hori + 0.03, ymax=1. - margin_squeeze + 0.02) c.pads()[col + 0 * nb_col]._legends[-1].SetTextSize(style.GetLegendTextSize()) c.pads()[col + 1 * nb_col]._legends[-1].SetTextSize(style.GetLegendTextSize()) # Common decorations for pad in c.pads(): pad._xaxis().SetNdivisions(504) pass c.text([], qualifier=QUALIFIER, xmin=margin_hori, ymax=1. - margin_vert + 0.03) c.pads()[1].text(["#sqrt{s} = 13 TeV, #it{W} jet tagging"] + \ (['m #in [60, 100] GeV'] if masscut else []), ATLAS=False, ymax=1. - margin_vert - 0.10) c.pads()[3].text(["Multijets"], ATLAS=False, ymax=1. - margin_squeeze - 0.10) # Arrows c._bare().cd() opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2) tlatex = ROOT.TLatex() tlatex.SetTextAngle(90) tlatex.SetTextAlign(22) tlatex.SetTextSize(11) tlatex.SetTextColor(ROOT.kGray + 2) tlatex.DrawLatexNDC(0.5, 0. + 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), " Less sculpting #rightarrow") tlatex.DrawLatexNDC(0.5, 1. - 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), " Greater separation #rightarrow") pass # Temporary style scope return c
def main (args): # Definitions histstyle = dict(**HISTSTYLE) # Initialise args, cfg = initialise(args) # Load data data, features, _ = load_data('data/' + args.input) #, test=True) # outFile = ROOT.TFile.Open("figures/knn_jet_ungrtrk500_eff{}_data.root".format(knn_eff),"RECREATE") EFF = 0.5 VAR = 'jet_ungrtrk500' VARX = 'dijetmass' FIT_RANGE = (0, 6000) # Necessary? #eff_sig = 0.50 #fpr, tpr, thresholds = roc_curve(data['signal'], data[kNN_basevar], sample_weight=data['weight']) #idx = np.argmin(np.abs(tpr - eff_sig)) #print "Background acceptance @ {:.2f}% sig. eff.: {:.2f}% ({} > {:.2f})".format(eff_sig * 100., (fpr[idx]) * 100., kNN_basevar, thresholds[idx]) #changed from 1-fpr[idx] #print "Chosen target efficiency: {:.2f}%".format(kNN_eff) weight = 'weight' # 'weight_test' / 'weight' bins_mjj = np.linspace(100, 8000, 20) fineBins = np.linspace(100, 8000, 7900) fineBinsRe = fineBins.reshape(-1,1) percs = [] for i in range(1, len(bins_mjj)): msk = (data[VARX] > bins_mjj[i-1]) & (data[VARX] <= bins_mjj[i]) & (data['signal']==0) if np.sum(msk) > 20: # Ensure sufficient statistics for meaningful percentile. Was 20 percs.append( wpercentile(data=data.loc[msk, VAR].values, percents=100-EFF, weights=data.loc[msk, weight].values) )#wpercentile else: percs.append(0) print "Length of percs: ", len(percs), percs percs = percs[0:-1] bins_mjj = bins_mjj[0:-1] X = bins_mjj.reshape(-1,1) X = X[1:len(bins_mjj)] print len(X), len(percs) # Fit parameters knn_neighbors = 2 knn_weights = 'uniform' fit_deg = 1 knn = KNeighborsRegressor(n_neighbors=5, weights='distance') y_knn = knn.fit(X, percs).predict(fineBinsRe) c = rp.canvas(batch=True) knnFit = c.plot(y_knn, bins=fineBins, linecolor=ROOT.kRed+2, linewidth=2, linestyle=1, label="knn fit, uniform", option='L') c.save('figures/distributions/percentile_test.pdf'.format(EFF, args.input)) outFile.cd() knnFit.SetName("kNNfit") knnFit.Write() outFile.Close() """
def plot(*argv): """ Method for delegating plotting. """ # Unpack arguments data, args, features, scan_features, points, jsd_limits, masscut, pt_range = argv with TemporaryStyle() as style: # Compute yaxis range ranges = int(pt_range is not None) + int(masscut) mult = 10. if ranges == 2 else (5. if ranges == 1 else 1.) # Define variable(s) axisrangex = (1.4, 100.) axisrangey = (0.3, 100000. * mult) aminx, amaxx = axisrangex aminy, amaxy = axisrangey # Styling scale = 0.95 style.SetTitleOffset(1.8, 'x') style.SetTitleOffset(1.6, 'y') style.SetTextSize(style.GetTextSize() * scale) style.SetLegendTextSize(style.GetLegendTextSize() * scale) # Canvas c = rp.canvas(batch=not args.show, size=(600, 600)) # Reference lines nullopts = dict(linecolor=0, linewidth=0, linestyle=0, markerstyle=0, markersize=0, fillstyle=0) lineopts = dict(linecolor=ROOT.kGray + 2, linewidth=1, option='L') boxopts = dict(fillcolor=ROOT.kBlack, alpha=0.05, linewidth=0, option='HIST') c.hist([aminy], bins=list(axisrangex), **nullopts) c.plot([1, amaxy], bins=[2, 2], **lineopts) c.plot([1, 1], bins=[2, amaxx], **lineopts) c.hist([amaxy], bins=[aminx, 2], **boxopts) c.hist([1], bins=[2, amaxx], **boxopts) # Meaningful limits on 1/JSD x, y, ey = map(np.array, zip(*jsd_limits)) ex = np.zeros_like(ey) gr = ROOT.TGraphErrors(len(x), x, y, ex, ey) smooth_tgrapherrors(gr, ntimes=3) c.graph(gr, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3') x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double( 0), ROOT.Double(0) idx = 3 gr.GetPoint(idx, x_, y_) ey_ = gr.GetErrorY(idx) x_, y_ = map(float, (x_, y_)) c.latex("Statistical limit", x_, y_ + ey_, align=21, textsize=11, angle=-5, textcolor=ROOT.kGray + 2) # Markers for is_simple in [True, False]: # Split the legend into simple- and MVA taggers for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)): # Coordinates, label idx = map(lambda t: t[2], points).index(feat) x, y, label = points[idx] # Overwrite default name of parameter-scan classifier label = 'ANN' if label.startswith('ANN') else label label = 'uBoost' if label.startswith('uBoost') else label # Style colour = rp.colours[(ifeat // 2) % len(rp.colours)] markerstyle = 20 + (ifeat % 2) * 4 # Draw c.graph([y], bins=[x], markercolor=colour, markerstyle=markerstyle, label='#scale[%.1f]{%s}' % (scale, latex(label, ROOT=True)), option='P') pass # Draw class-specific legend width = 0.15 c.legend(header=("Analytical:" if is_simple else "MVA:"), width=width, xmin=0.60 + (width + 0.02) * (is_simple), ymax=0.888) #, ymax=0.827) pass # Make legends transparent for leg in c.pads()[0]._legends: leg.SetFillStyle(0) pass # Markers, parametrised decorrelation for base_feat, group in scan_features.iteritems(): # Get index in list of features ifeat = features.index(base_feat) # Style colour = rp.colours[(ifeat // 2) % len(rp.colours)] markerstyle = 24 for feat, label in group: idx = map(lambda t: t[2], points).index(feat) x, y, _ = points[idx] # Draw c.graph([y], bins=[x], markercolor=colour, markerstyle=markerstyle, option='P') if base_feat == 'NN': c.latex(" " + label, x, y, textsize=11, align=12, textcolor=ROOT.kGray + 2) else: c.latex(label + " ", x, y, textsize=11, align=32, textcolor=ROOT.kGray + 2) pass pass # Connecting lines (scan) feats = [base_feat] + map(lambda t: t[0], group) for feat1, feat2 in zip(feats[:-1], feats[1:]): idx1 = map(lambda t: t[2], points).index(feat1) idx2 = map(lambda t: t[2], points).index(feat2) x1, y1, _ = points[idx1] x2, y2, _ = points[idx2] c.graph([y1, y2], bins=[x1, x2], linecolor=colour, linestyle=2, option='L') pass pass # Connecting lines (simple) print "points: " print points points.pop(1) print points for i in range(2): x1, y1, _ = points[2 * i + 0] x2, y2, _ = points[2 * i + 1] colour = rp.colours[i] c.graph([y1, y2], bins=[x1, x2], linecolor=colour, linestyle=2, option='L') pass # Decorations c.xlabel( "Background rejection, 1 / #varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%" ) c.ylabel("Mass-decorrelation, 1 / JSD @ #varepsilon_{sig}^{rel} = 50%") c.xlim(*axisrangex) c.ylim(*axisrangey) c.logx() c.logy() opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2) midpointx = np.power(10, 0.5 * np.log10(amaxx)) midpointy = np.power(10, 0.5 * np.log10(amaxy)) c.latex("No separation", 1.91, midpointy, angle=90, align=21, **opts_text) c.latex("Maximal sculpting", midpointx, 0.89, angle=0, align=23, **opts_text) c.latex(" Less sculpting #rightarrow", 2.1, midpointy, angle=90, align=23, **opts_text) c.latex(" Greater separation #rightarrow", midpointx, 1.1, angle=0, align=21, **opts_text) #c.text(TEXT + ["#it{W} jet tagging"], xmin=0.24, qualifier=QUALIFIER) c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER) c.text(TEXT + \ ["#it{W} jet tagging"] + ( ["p_{{T}} #in [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else [] ) + ( ['Cut: m #in [60, 100] GeV'] if masscut else [] ), xmin=0.26, ATLAS=None) pass return c
def plot(*argv): """ Method for delegating plotting. """ # Unpack arguments args, data, effs, jsd, jsd_limits, features, pt_range = argv with TemporaryStyle() as style: # Style style.SetTitleOffset(1.5, 'x') style.SetTitleOffset(2.0, 'y') # Canvas c = rp.canvas(batch=not args.show) # Plots ref = ROOT.TH1F('ref', "", 10, 0., 1.) for i in range(ref.GetXaxis().GetNbins()): ref.SetBinContent(i + 1, 1) pass c.hist(ref, linecolor=ROOT.kGray + 2, linewidth=1) width = 0.15 for is_simple in [True, False]: for ifeat, feat in enumerate(features): if is_simple != signal_low(feat): continue colour = rp.colours[(ifeat // 2) % len(rp.colours)] linestyle = 1 + (ifeat % 2) markerstyle = 20 + (ifeat % 2) * 4 c.plot(jsd[feat][1:], bins=np.array(effs[1:]) / 100., linecolor=colour, markercolor=colour, linestyle=linestyle, markerstyle=markerstyle, label=latex(feat, ROOT=True), option='PL') pass c.legend(header=("Analytical:" if is_simple else "MVA:"), width=width * (1 + 0.8 * int(is_simple)), xmin=0.42 + (width + 0.05) * (is_simple), ymax=0.888) pass # Meaningful limits on JSD x, y, ey = map(np.array, zip(*jsd_limits)) ex = np.zeros_like(ey) gr = ROOT.TGraphErrors(len(x), x, y, ex, ey) smooth_tgrapherrors(gr, ntimes=2) c.graph(gr, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3') # Redraw axes c.pads()[0]._primitives[0].Draw('AXIS SAME') # Decorations c.xlabel("Background efficiency #varepsilon_{bkg}^{rel}") c.ylabel("Mass correlation, JSD") c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER) c.text(["#sqrt{s} = 13 TeV", "Dijets"] + \ (["p_{T} [GeV] #in", " [{:.0f}, {:.0f}]".format(*pt_range)] if pt_range else []), ymax=0.85, ATLAS=None) c.latex("Maximal sculpting", 0.065, 1.2, align=11, textsize=11, textcolor=ROOT.kGray + 2) c.xlim(0, 1) c.ymin(1E-05) c.padding(0.45) c.logy() for leg in c.pad()._legends: leg.SetMargin(0.5) pass x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double( 0), ROOT.Double(0) idx = gr.GetN() - 7 gr.GetPoint(idx, x_, y_) ey_ = gr.GetErrorY(idx) x_, y_ = map(float, (x_, y_)) c.latex("Statistical limit", x_, y_ - ey_ / 2., align=23, textsize=11, angle=12, textcolor=ROOT.kGray + 2) pass return c
def main(args): # Initialise args, cfg = initialise(args) # Load data data, _, _ = load_data(args.input + 'data.h5', train=True) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # ------------------------------------------------------------------------- #### #### # Initialise Keras backend #### initialise_backend(args) #### #### # Neural network-specific initialisation of the configuration dict #### initialise_config(args, cfg) #### #### # Keras import(s) #### from keras.models import load_model #### #### # NN #### from run.adversarial.common import add_nn #### with Profile("NN"): #### classifier = load_model('models/adversarial/classifier/full/classifier.h5') #### add_nn(data, classifier, 'NN') #### pass # ------------------------------------------------------------------------- # Fill measured profile profile_meas, _ = fill_profile(data[msk_bkg]) # Add k-NN variable knnfeat = 'knn' add_knn(data, newfeat=knnfeat, path='models/knn/knn_{}_{}.pkl.gz'.format(VAR, EFF)) # Loading KNN classifier knn = loadclf('models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(VAR, EFF)) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 edges, centres = dict(), dict() for ax, var in zip(['x', 'y'], [VARX, VARY]): # Short-hands vbins, vmin, vmax = AXIS[var] # Re-binned bin edges @TODO: Make standardised right away? edges[ax] = np.interp( np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), range(vbins + 1), np.linspace(vmin, vmax, vbins + 1, endpoint=True)) # Re-binned bin centres centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax]) pass # Get predictions evaluated at re-binned bin centres g = dict() g['x'], g['y'] = np.meshgrid(centres['x'], centres['y']) g['x'], g['y'] = standardise(g['x'], g['y']) X = np.vstack((g['x'].flatten(), g['y'].flatten())).T fit = knn.predict(X).reshape(g['x'].shape).T # Fill ROOT "profile" profile_fit = ROOT.TH2F('profile_fit', "", len(edges['x']) - 1, edges['x'].flatten('C'), len(edges['y']) - 1, edges['y'].flatten('C')) root_numpy.array2hist(fit, profile_fit) pass # Plotting with Profile("Plotting"): for fit in [False, True]: # Select correct profile profile = profile_fit if fit else profile_meas # Plot plot(profile, fit) pass pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency for sig, msk in zip([True, False], [msk_sig, msk_bkg]): if sig: rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) else: rgbs = [(255 / 255., 51 / 255., 4 / 255.), (247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.array([0] + list( np.linspace(0, 1, nb_cols - 1, endpoint=True) * (1. - EFF / 100.) + EFF / 100.)) pass ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) # Define arrays shape = (AXIS[VARX][0], AXIS[VARY][0]) bins = [ np.linspace(AXIS[var][1], AXIS[var][2], AXIS[var][0] + 1, endpoint=True) for var in VARS ] x, y, z = (np.zeros(shape) for _ in range(3)) # Create `profile` histogram profile = ROOT.TH2F('profile', "", len(bins[0]) - 1, bins[0].flatten('C'), len(bins[1]) - 1, bins[1].flatten('C')) # Compute inclusive efficiency in bins of `VARY` effs = list() for edges in zip(bins[1][:-1], bins[1][1:]): msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1]) msk_pass = data[knnfeat] < 0 num = data.loc[msk & msk_bin & msk_pass, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() effs.append(num / den) pass # Fill profile for i, j in itertools.product(*map(range, shape)): # Bin edges in x and y edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)] # Masks msks = [(data[var] > edges[dim][0]) & (data[var] <= edges[dim][1]) for dim, var in enumerate(VARS)] msk_bin = reduce(lambda x, y: x & y, msks) data_ = data[msk & msk_bin] # Set non-zero bin content if np.sum(msk & msk_bin): msk_pass = data_[knnfeat] < 0 num = data.loc[msk & msk_bin & msk_pass, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() eff = num / den profile.SetBinContent(i + 1, j + 1, eff) pass pass c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True) + " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle("Large-#it{R} jet " + latex(VARY, ROOT=True) + " [GeV]") profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" % (latex(VAR, ROOT=True), EFF)) profile.GetYaxis().SetNdivisions(505) profile.GetZaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.8) profile.GetZaxis().SetTitleOffset(1.3) zrange = (0., 1.) if zrange: profile.GetZaxis().SetRangeUser(*zrange) pass profile.SetContour(NB_CONTOUR) # Draw profile.Draw('COLZ') # Decorations c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15) c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"], ATLAS=False) # -- Efficiencies xaxis = profile.GetXaxis() yaxis = profile.GetYaxis() tlatex = ROOT.TLatex() tlatex.SetTextColor(ROOT.kGray + 2) tlatex.SetTextSize(0.023) tlatex.SetTextFont(42) tlatex.SetTextAlign(32) xt = xaxis.GetBinLowEdge(xaxis.GetNbins()) for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)): yt = yaxis.GetBinCenter(ibin) tlatex.DrawLatex( xt, yt, "%s%.1f%%" % ("#bar{#varepsilon}^{rel}_{%s} = " % ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.)) pass # -- Bounds BOUNDS[0].DrawCopy("SAME") BOUNDS[1].DrawCopy("SAME") c.latex("m > 50 GeV", -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3) c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3) # Save mkdir('figures/knn/') c.save('figures/knn/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', VAR, EFF)) pass return
def plot(data, urs, classifiers): """ Common method to perform tests on named uBoost/Adaboost classifier. """ # Plotting learning process # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - with Profile("Plotting learning process"): for alpha, (title, name) in zip(urs, classifiers): if title is 'AdaBoost': continue print "===", name, title # Get training/test split masks msk_train = data['train'] == 1 msk_test = data['train'] == 0 # Get target and weight arrays y_train = data.loc[msk_train, 'signal'].values.flatten() y_test = data.loc[msk_test, 'signal'].values.flatten() w_train = data.loc[msk_train, 'weight_adv'].values.flatten() w_test = data.loc[msk_test, 'weight_adv'].values.flatten() # Compute log-loss for each epoch ll_ab_train, ll_ab_test = list(), list() ll_ub_train, ll_ub_test = list(), list() nb_epochs = len( filter(lambda col: col.startswith(name), data.columns)) x = np.arange(nb_epochs) for epoch in range(nb_epochs): # -- Get column names for current epoch col_ab = '{:s}__{:d}'.format( classifiers[0][1], epoch) # Assuming `AdaBoost` is first classifier col_ub = '{:s}__{:d}'.format(name, epoch) # -- Get classifier variables for current epoch p_ab_train = data.loc[msk_train, col_ab] p_ab_test = data.loc[msk_test, col_ab] p_ub_train = data.loc[msk_train, col_ub] p_ub_test = data.loc[msk_test, col_ub] # -- Compute log-loss for current epoch ll_ab_train.append( log_loss(y_train, p_ab_train, sample_weight=w_train)) ll_ab_test.append( log_loss(y_test, p_ab_test, sample_weight=w_test)) ll_ub_train.append( log_loss(y_train, p_ub_train, sample_weight=w_train)) ll_ub_test.append( log_loss(y_test, p_ub_test, sample_weight=w_test)) pass # Plot log-loss curves c = rp.canvas(batch=True) # -- Common plotting options opts = dict(linewidth=2, legend_option='L') c.graph(ll_ab_train, bins=x, linecolor=rp.colours[5], linestyle=1, option='AL', label='AdaBoost', **opts) c.graph(ll_ab_test, bins=x, linecolor=rp.colours[5], linestyle=2, option='L', **opts) c.graph(ll_ub_train, bins=x, linecolor=rp.colours[1], linestyle=1, option='L', label='uBoost', **opts) c.graph(ll_ub_test, bins=x, linecolor=rp.colours[1], linestyle=2, option='L', **opts) # -- Decorations c.pad()._yaxis().SetNdivisions(505) c.xlabel("Training epoch") c.ylabel("BDT classifier loss") c.xlim(0, len(x)) c.ylim(0.3, 1.4) c.legend(width=0.28) c.legend(header='Dataset:', categories=[('Training', { 'linestyle': 1 }), ('Testing', { 'linestyle': 2 })], width=0.28, ymax=0.69) for leg in c.pad()._legends: leg.SetFillStyle(0) pass c.text([ "#sqrt{s} = 13 TeV", "#it{W} jet tagging", "Uniforming rate #alpha = {:3.1f}".format(alpha) ], qualifier="Simulation Internal") # -- Save c.save('figures/loss_uboost__alpha{:4.2f}'.format(alpha).replace( '.', 'p') + '.pdf') pass pass return
def main(args): # ... # Load data data_, features, _ = load_data(args.input + 'data.h5', train=True) for pt_bin in [(200., 500.), (500., 1000.)]: # Impose pT-cut data = data_[(data_['pt'] >= pt_bin[0]) & (data_['pt'] < pt_bin[1])] var = 'Tau21' msk_sig = (data['signal'] == 1) x = data[var].values m = data['m'].values w = data['weight_test'].values # Get cut value cut = wpercentile(x[msk_sig], 50., weights=w) print "Cut value: {:.2f}".format(cut) # Discard signal x = x[~msk_sig] m = m[~msk_sig] w = w[~msk_sig] # Get pass mask msk_pass = x < cut print "Background efficiency: {:.1f}%".format( 100. * w[msk_pass].sum() / w.sum()) # Canvas offset = 0.06 margin = 0.3 # @NOTE # A = Height of pad 0 # B = Height of pads 1,2 # C = Height of pad 3 # --> # A = 0.5 # # (1. - 2 * offset) * B = (1. - 2*offset - margin) * C # ==> # B = C * (1. - 2*offset - margin) / (1. - 2 * offset) # ==> # B = C * (1 - margin / (1. - 2 * offset)) # # A + 2 * B + C = 1 # ==> # A + 2 * C * (1 - margin / (1. - 2 * offset)) + C = 1 # ==> # C = (1 - A) / (1 + 2 * (1 - margin / (1. - 2 * offset))) A = 0.5 C = (1 - A) / (1 + 2 * (1 - margin / (1. - 2 * offset))) B = C * (1 - margin / (1. - 2 * offset)) c = rp.canvas(batch=True, num_pads=4, fraction=(A, B, B, C), size=(600, 700)) # Set pad margins c.pad(0)._bare().SetBottomMargin(offset) c.pad(1)._bare().SetTopMargin(offset) c.pad(1)._bare().SetBottomMargin(offset) c.pad(2)._bare().SetTopMargin(offset) c.pad(2)._bare().SetBottomMargin(offset) c.pad(3)._bare().SetTopMargin(offset) c.pad(3)._bare().SetBottomMargin(offset + margin) # Styling HISTSTYLE[True]['label'] = 'Passing cut, #it{{P}}'.format( latex(var, ROOT=True)) HISTSTYLE[False]['label'] = 'Failing cut, #it{{F}}'.format( latex(var, ROOT=True)) # Histograms F = c.hist(m[~msk_pass], bins=MASSBINS, weights=w[~msk_pass], normalise=True, **HISTSTYLE[False]) P = c.hist(m[msk_pass], bins=MASSBINS, weights=w[msk_pass], normalise=True, **HISTSTYLE[True]) P, F = map(root_numpy.hist2array, [P, F]) M = (P + F) / 2 c.hist(M, bins=MASSBINS, normalise=True, linewidth=3, linecolor=ROOT.kViolet, linestyle=2, label='Average, #it{M}') # Compute divergences KL_PM = -P * np.log2(M / P) KL_FM = -F * np.log2(M / F) JSD = (KL_PM + KL_FM) / 2. JSDsum = np.cumsum(JSD) opts = dict(bins=MASSBINS, fillcolor=ROOT.kGray, alpha=0.5) # Draw divergences c.pad(1).hist(KL_PM, **opts) c.pad(1).ylim(-0.12, 0.05) c.pad(1).yline(0.) c.pad(2).hist(KL_FM, **opts) c.pad(2).ylim(-0.05, 0.12) c.pad(2).yline(0.) c.pad(3).hist(JSD, **opts) c.pad(3).ylim(0., 0.03) c.pad(3).yline(0.) o = rp.overlay(c.pad(3), color=ROOT.kViolet, ndiv=502) o.hist(JSDsum, bins=MASSBINS, linecolor=ROOT.kViolet) o.label("#sum_{i #leq n} JSD(P #parallel F)") o.lim(0, 0.2) #o._update_overlay() # Styling axes c.pad(0)._xaxis().SetTitleOffset(999.) c.pad(1)._xaxis().SetTitleOffset(999.) c.pad(2)._xaxis().SetTitleOffset(999.) c.pad(3)._xaxis().SetTitleOffset(5.) c.pad(0)._xaxis().SetLabelOffset(999.) c.pad(1)._xaxis().SetLabelOffset(999.) c.pad(2)._xaxis().SetLabelOffset(999.) c.pad(0)._yaxis().SetNdivisions(505) c.pad(1)._yaxis().SetNdivisions(502) c.pad(2)._yaxis().SetNdivisions(502) c.pad(3)._yaxis().SetNdivisions(502) c.pad(0).ylim(0, 0.20) c.pad(0).cd() c.pad(0)._get_first_primitive().Draw('SAME AXIS') # Decorations c.text(TEXT + [ "Multijets, training dataset", "Cut on {:s} at #varepsilon_{{sig}}^{{rel}} = 50%".format( latex(var, ROOT=True)), "p_{{T}} #in [{:.0f}, {:.0f}] GeV".format(*pt_bin) ], qualifier='Simulation Internal') c.legend(width=0.25) c.xlabel("Large-#it{R} jet mass [GeV]") c.ylabel("Fraction of jets") c.pad(1).ylabel('KL(P #parallel M)') c.pad(2).ylabel('KL(F #parallel M)') c.pad(3).ylabel('JSD(P #parallel F)') # Save c.save('figures/massdecorrelationmetric_{:s}__pT{:.0f}_{:.0f}GeV.pdf'. format(var, *pt_bin)) pass return 0
def plot_adversarial_training_loss( lambda_reg, num_folds, pretrain_epochs, H_prior=None, basedir='models/adversarial/combined/crossval/'): """ Plot the classifier, adversary, and combined losses for the adversarial training of the jet classifier. """ # Check(s) if not basedir.endswith('/'): basedir += '/' pass # Define variable(s) digits = int(np.ceil(max(-np.log10(lambda_reg), 0))) lambda_str = '{l:.{d:d}f}'.format(d=digits, l=lambda_reg).replace('.', 'p') # Get paths to all cross-validation adversarially trained classifiers if num_folds: paths = sorted( glob.glob(basedir + 'history__combined_lambda{}__*of{}.json'.format( lambda_str, num_folds))) else: paths = glob.glob(basedir + 'history__combined_lambda{}.json'.format(lambda_str)) pass print "Found {} paths.".format(len(paths)) if len(paths) == 0: return # Store losses keys = [ 'train_comb', 'train_clf', 'train_adv', 'val_comb', 'val_clf', 'val_adv' ] losses = {key: list() for key in keys} for path in paths: with open(path, 'r') as f: d = json.load(f) pass # Loop loss classes for name, prefix in zip(['train', 'val'], ['', 'val_']): try: # Classifier loss = np.array(d[prefix + 'classifier_loss']) loss[loss > 7.0] = np.nan losses[name + '_clf'].append(loss) # Adversary loss = np.array(d[prefix + 'adversary_loss']) losses[name + '_adv'].append(loss) # Combined losses[name + '_comb'].append(losses[name + '_clf'][-1] - lambda_reg * losses[name + '_adv'][-1]) except KeyError: pass # No validation pass # Plot results c = rp.canvas(batch=True, num_pads=3, ratio=False, size=(600, 800)) bins = np.arange(len(loss)) histbins = np.arange(len(loss) + 1) - 0.5 # Axes for idx in range(3): c.pads()[idx].hist([0], bins=[0, len(bins) - 1], linewidth=0, linestyle=0) # Force correct x-axis pass # Plots categories = list() for ityp, typ in enumerate(['val', 'train']): for igrp, grp in enumerate(['clf', 'adv', 'comb']): key = '{}_{}'.format(typ, grp) colour = rp.colours[1 if typ == 'train' else 4] # Create histogram try: loss_mean = np.nanmean(losses[key], axis=0) loss_std = np.nanstd(losses[key], axis=0) hist = ROOT.TH1F(key, "", len(histbins) - 1, histbins) for ibin in range(len(loss_mean)): hist.SetBinContent(ibin + 1, loss_mean[ibin]) hist.SetBinError(ibin + 1, loss_std[ibin]) pass c.pads()[igrp].hist(hist, fillcolor=colour, linestyle=ityp + 1, linewidth=0, alpha=0.3, option='LE3') c.pads()[igrp].hist(hist, fillcolor=0, fillstyle=0, linecolor=colour, linestyle=ityp + 1, linewidth=3, option='HISTL') except TypeError: pass # No validation if igrp == 0: categories += [('Training' if typ == 'train' else 'Validation', { 'linestyle': ityp + 1, 'linewidth': 3, 'fillcolor': colour, 'alpha': 0.3, 'linecolor': colour, 'option': 'FL' })] pass pass pass # Formatting pads margin = 0.2 ymins, ymaxs = list(), list() clf_opt_val = None for ipad, pad in enumerate(c.pads()): tpad = pad._bare() # ROOT.TPad f = ipad / float(len(c.pads()) - 1) tpad.SetLeftMargin(0.20) tpad.SetBottomMargin(f * margin) tpad.SetTopMargin((1 - f) * margin) pad._xaxis().SetNdivisions(505) pad._yaxis().SetNdivisions(505) if ipad < len(c.pads()) - 1: # Not bottom pad pad._xaxis().SetLabelOffset(9999.) pad._xaxis().SetTitleOffset(9999.) else: pad._xaxis().SetTitleOffset(3.5) pass ymin, ymax = list(), list() for hist in pad._primitives: if not isinstance(hist, ROOT.TGraph): ymin.append(get_min(hist)) ymax.append(get_max(hist)) pass pass # Get reference-line value clf_opt_val = clf_opt_val or c.pads()[0]._primitives[1].GetBinContent( 1) ref = clf_opt_val if ipad == 0 else ( H_prior if ipad == 1 else clf_opt_val - lambda_reg * H_prior) ymin = min(ymin + [ref]) ymax = max(ymax + [ref]) ydiff = ymax - ymin ymin -= ydiff * 0.2 ymax += ydiff * (0.7 if ipad == 0 else (0.7 if ipad == 1 else 0.2)) if ipad == 0: # ymin = 0.25 ymax *= 1.2 pass pad.ylim(ymin, ymax) ymins.append(ymin) ymaxs.append(ymax) pass c._bare().Update() # Pre-training boxes boxes = list() for ipad, pad in enumerate(c.pads()): pad._bare().cd() boxes.append(ROOT.TBox(0, ymins[ipad], pretrain_epochs, ymaxs[ipad])) boxes[-1].SetFillColorAlpha(ROOT.kBlack, 0.05) boxes[-1].Draw("SAME") pass # Vertical lines for ipad in range(len(c.pads())): align = 'TR' if ipad < 2 else 'BR' c.pads()[ipad].xline( pretrain_epochs, ymin=ymins[ipad], ymax=ymaxs[ipad], text=' Adv. pre-training ' if ipad == 0 else None, text_align=align, linestyle=1, linecolor=ROOT.kGray + 2) pass # Horizontal lines c.pads()[0].yline(clf_opt_val) if H_prior is not None: c.pads()[1].yline(H_prior) c.pads()[2].yline(clf_opt_val - lambda_reg * (H_prior)) pass opts = dict(align=31, textcolor=ROOT.kGray + 2, textsize=14) c.pads()[0].latex("Stand-alone NN ", bins[-1] * 0.98, clf_opt_val + (ymaxs[0] - ymins[0]) * 0.03, **opts) if H_prior is not None: c.pads()[1].latex("#it{H}(prior) ", bins[-1] * 0.98, H_prior + (ymaxs[1] - ymins[1]) * 0.03, **opts) opts['align'] = 33 c.pads()[2].latex( "Ideal ", bins[-1] * 0.98, clf_opt_val - lambda_reg * (H_prior) - (ymaxs[2] - ymins[2]) * 0.03, **opts) pass # Decorations ROOT.gStyle.SetTitleOffset(2.0, 'y') # 2.2 c.xlabel("Training epoch") c.pads()[0].ylabel("#it{L}_{clf.}") c.pads()[1].ylabel("#it{L}_{adv.}") c.pads()[2].ylabel("#it{L}_{clf.} #minus #lambda #it{L}_{adv.}") for pad in c.pads(): pad.xlim(0, max(bins) - 1) pass c.pads()[0].text([], xmin=0.2, ymax=0.85, qualifier=QUALIFIER) c.pads()[1].text([ "#sqrt{s} = 13 TeV", "#it{W} jet tagging", "Adversarial training (#lambda = %s)" % (lambda_str.replace('p', '.')) ], ATLAS=False, ymax=0.70, xmin=0.27) c.pads()[0].legend(xmin=0.60, ymax=0.70, categories=categories) # Save mkdir('figures/') c.save('figures/loss_adversarial_lambda{}_{}.pdf'.format( lambda_str, 'full' if num_folds is None else 'cv')) return
def plot (profile, fit): """ Method for delegating plotting. """ # rootplotting c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling #profile.SetLineColor(4) profile.SetMarkerColor(4) profile.SetMarkerStyle(20) fit.SetLineColor(2) fit.SetMarkerColor(4) fit.SetMarkerStyle(20) profile.GetXaxis().SetTitle( "#it{m}_{jj} [GeV]" ) #latex(VARX, ROOT=True) + " [GeV]") #+ " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle( "#it{P}^{#varepsilon=%s%%}" % (EFF) ) #"%s %s^{(%s%%)}" % ("#it{k}-NN fitted" if fit else "Measured", latex(VAR, ROOT=True), EFF)) profile.GetYaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.4) profile.GetXaxis().SetRangeUser(*XRANGE) #profile.GetXaxis().SetRangeUser(1000, 9000) #fit.GetXaxis().SetRangeUser(1000, 8000) if YRANGE: profile.GetYaxis().SetRangeUser(*YRANGE) pass # Draw Goddamn it # print profile.GetBinContent(10), profile.GetNbinsX(), profile.GetEntries() profile.Draw("AP") fit.Draw("SAME") #("SAME") leg = ROOT.TLegend(0.2, 0.75, 0.5, 0.85) if INPUT=='data': leg.AddEntry(profile, "CR Data", "p") elif INPUT=='mcCR': leg.AddEntry(profile, "CR MC", "p") elif INPUT=='mc': leg.AddEntry(profile, "Full MC", "p") if 'knn' in FIT: fitLegend = "k-NN fit " elif 'poly2' in FIT: fitLegend = "2. order polynomial fit " elif 'poly3' in FIT: fitLegend = "3. order polynomial fit " elif 'erf' in FIT: fitLegend = "Error function fit " if MODEL=='data': fitLegend += "to CR Data" elif MODEL=='mcCR': fitLegend += "to CR MC" elif MODEL=='mc': fitLegend += "to Full MC" leg.AddEntry(fit, fitLegend, "l") leg.Draw() # Save mkdir('figures/knn/') c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.pdf'.format( FIT, VAR, EFF, MODEL+INPUT, MIN_STAT)) #c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.png'.format( FIT, VAR, EFF, MODEL, MIN_STAT)) c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.eps'.format( FIT, VAR, EFF, MODEL+INPUT, MIN_STAT)) del c pass
def plot_classifier_training_loss( num_folds, basedir='models/adversarial/classifier/crossval/'): """ Plot the classifier training loss. """ # Check(s) if not basedir.endswith('/'): basedir += '/' pass # Get paths to classifier training losses paths = sorted( glob.glob( basedir + '/history__crossval_classifier__*of{}.json'.format(num_folds))) if len(paths) == 0: print "No models found for classifier CV study." return # Read losses from files losses = {'train': list(), 'val': list()} for path in paths: with open(path, 'r') as f: d = json.load(f) pass loss = np.array(d['val_loss']) print "Outliers:", loss[np.abs(loss - 0.72) < 0.02] loss[np.abs(loss - 0.72) < 0.02] = np.nan # @FIXME: This probably isn't completely kosher losses['val'].append(loss) loss = np.array(d['loss']) losses['train'].append(loss) pass # Define variable(s) bins = np.arange(len(loss)) histbins = np.arange(len(loss) + 1) + 0.5 # Canvas c = rp.canvas(batch=True) # Plots categories = list() for name, key, colour, linestyle in zip(['Validation', 'Training'], ['val', 'train'], [rp.colours[4], rp.colours[1]], [1, 2]): # Histograms loss_mean = np.nanmean(losses[key], axis=0) loss_std = np.nanstd(losses[key], axis=0) hist = ROOT.TH1F(key + '_loss', "", len(histbins) - 1, histbins) for idx in range(len(loss_mean)): hist.SetBinContent(idx + 1, loss_mean[idx]) hist.SetBinError(idx + 1, loss_std[idx]) pass c.hist([0], bins=[0, max(bins)], linewidth=0, linestyle=0) # Force correct x-axis c.hist(hist, fillcolor=colour, alpha=0.3, option='LE3') c.hist(hist, linecolor=colour, linewidth=3, linestyle=linestyle, option='HISTL') categories += [(name, { 'linestyle': linestyle, 'linewidth': 3, 'linecolor': colour, 'fillcolor': colour, 'alpha': 0.3, 'option': 'FL' })] pass # Decorations c.pads()[0]._yaxis().SetNdivisions(505) c.xlabel("Training epoch") c.ylabel("Cross-validation classifier loss, L_{clf}") c.xlim(0, max(bins)) c.ylim(0.3, 0.5) c.legend(categories=categories, width=0.25) # ..., xmin=0.475 c.text(TEXT + ["#it{W} jet tagging", "Neural network (NN) classifier"], qualifier=QUALIFIER) # Save mkdir('figures/') c.save('figures/loss_classifier.pdf') return
def plot_individual (*argv): """ Method for delegating plotting. """ # Unpack arguments data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut = argv # To fix 30.5 --> 30 for NPV bins['npv'][-1] = np.floor(bins['npv'][-1]) # Loop combinations for var, metric in itertools.product(['pt', 'npv', None], ['rej', 'jsd']): with TemporaryStyle() as style: # Set styles scale = 1.0 scale_axis = 0.7 margin_squeeze = 0.07 margin_vert = 0.15 margin_hori = 0.17 size = (350, 300) style.SetTextSize(scale_axis * style.GetTextSize()) for coord in ['x', 'y', 'z']: style.SetLabelSize(scale_axis * style.GetLabelSize(coord), coord) style.SetTitleSize(scale_axis * style.GetTitleSize(coord), coord) pass style.SetTitleOffset(1.8, 'y') style.SetLegendTextSize(style.GetLegendTextSize() * scale) style.SetTickLength(0.05, 'x') style.SetTickLength(0.05, 'y') # Canvas c = rp.canvas(size=size if var is not None else (150, 300), batch=not args.show) # Margins tpad = c.pad()._bare() tpad.SetBottomMargin(margin_vert if var is not None else 0.49) tpad.SetLeftMargin (margin_hori if var is not None else 0.49) tpad.SetRightMargin (margin_squeeze if var is not None else 0.49) tpad.SetTopMargin (margin_vert if var is not None else 0.49) # Plots # -- References if var is not None: boxopts = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST') c.hist([2 if metric == 'rej' else 1], bins=[bins[var] [0], bins[var] [-1]], **boxopts) for is_simple in [True, False]: for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)): opts = dict( linecolor = rp.colours[(ifeat // 2)], markercolor = rp.colours[(ifeat // 2)], fillcolor = rp.colours[(ifeat // 2)], linestyle = 1 + (ifeat % 2), alpha = 0.3, option = 'E2', ) mean_rej, std_rej = map(np.array, zip(*rejs[var][feat])) # @TEMP mean_jsd, std_jsd = map(np.array, zip(*jsds[var][feat])) # Only _show_ mass-decorrelated features for `npv` if (var == 'npv') and (ifeat % 2 == 0): mean_rej *= -9999. mean_jsd *= -9999. pass # Error boxes x = np.array(bins[var][:-1]) + 0.5 * np.diff(bins[var]) xerr = 0.5 * np.diff(bins[var]) graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej) graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd) if metric == 'rej': c.hist(graph_rej, **opts) else: c.hist(graph_jsd, **opts) pass # Markers and lines opts['option'] = 'PE2L' opts['markerstyle'] = 20 + 4 * (ifeat % 2) graph_rej = ROOT.TGraph(len(x), meanx[var], mean_rej) graph_jsd = ROOT.TGraph(len(x), meanx[var], mean_jsd) if metric == 'rej': c.hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts) else: c.hist(graph_jsd, label=latex(feat, ROOT=True) if is_simple else None, **opts) pass pass pass # end loop: `is_simple` # Meaningful limits on JSD if metric == 'jsd': x, y, ey_stat, ey_syst = map(np.array, zip(*jsd_limits[var])) ex = np.zeros_like(x) x[0] = bins[var][0] x[-1] = bins[var][-1] format = lambda arr: arr.flatten('C').astype(float) gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat]))) gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))]))) smooth_tgrapherrors(gr_stat, ntimes=2) smooth_tgrapherrors(gr_comb, ntimes=2) c.graph(gr_comb, fillcolor=ROOT.kBlack, alpha=0.03, option='3') c.graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3') x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0) idx = (gr_comb.GetN() - 1) if var == 'pt' else (gr_comb.GetN() // 2) gr_comb.GetPoint(idx, x_, y_) ey_ = gr_comb.GetErrorY(idx) x_, y_ = map(float, (x_, y_)) if var == 'pt': c.latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit ", x_, y_ - 1.0 * ey_, align=31, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2) pass pass # Decorations # -- offsets #c.pads()[2]._xaxis().SetTitleOffset(2.3) # -- x-axis label if var == 'pt': xlabel = "Large-#it{R} jet p_{T} [GeV]" elif var == 'npv': xlabel = "Number of reconstructed vertices N_{PV}" elif var is not None: raise NotImplementedError("Variable {} is not supported.".format(var)) c.xlabel(xlabel) # -- y-axis label if metric == 'rej': ylabel = "1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%" elif metric == 'jsd': ylabel = "1/JSD @ #varepsilon_{sig}^{rel} = 50%" else: raise NotImplementedError("Metric {} is not supported.".format(metric)) c.ylabel(ylabel) xmid = (bins[var][0] + bins[var][-1]) * 0.5 if metric == 'rej': c.latex("Random guessing", xmid, 2 * 0.9, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2) c.ylim(1, 100) # 500 else: c.latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2) c.ylim(0.2, 7E+04) # 2E+05 pass c.logy() # Common decorations c.pad()._xaxis().SetNdivisions(504) c.text([], qualifier=QUALIFIER, xmin=margin_hori, ymax=1. - margin_vert + 0.03) c.text( ["#sqrt{s} = 13 TeV, #it{W} jet tagging"] + \ (['m #in [60, 100] GeV'] if masscut else []) + \ (['Multijets'] if metric == 'jsd' else []), ATLAS=False, ymax=0.40 if (masscut and (var == 'pt') and (metric == 'rej')) else None) #, ymax=1. - margin_vert - 0.10) else: # Draw dummy histogram for is_simple in [True, False]: for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)): opts = dict( linecolor = rp.colours[(ifeat // 2)], markercolor = rp.colours[(ifeat // 2)], fillcolor = rp.colours[(ifeat // 2)], linestyle = 1 + (ifeat % 2), alpha = 0.3, option = 'E2', ) opts['option'] = 'PE2L' opts['markerstyle'] = 20 + 4 * (ifeat % 2) label = latex(feat, ROOT=True) if is_simple == (metric == 'jsd') else None h = c.hist([0.5], bins=[0,1], label=label, **opts) pass pass # "Remove" axes pad = c.pad() tpad = pad._bare() white = ROOT.kWhite pad._xaxis().SetLabelOffset(9999.) pad._xaxis().SetTitleOffset(9999.) pad._yaxis().SetLabelOffset(9999.) pad._yaxis().SetTitleOffset(9999.) pad._xaxis().SetAxisColor (white) # Remove "double ticks" pad._yaxis().SetAxisColor (white) # Remove "double ticks" tpad.SetFillColor (white) tpad.SetFrameFillColor (white) c._bare().SetFillColor (white) c._bare().SetFrameFillColor(white) # Draw class-specific legend width = 0.90 #margin_hori - 0.03 if var is None: if metric == 'rej': c.legend(header='MVA:', width=width, xmin=0.05, ymax=1. - margin_vert + 0.02) # xmin = margin_hori + 0.03 else: c.legend(header='Analytical:', width=width, xmin=0.05, ymax=1. - margin_vert + 0.02) pass c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize()) pass pass pass # Arrows ''' c._bare().cd() opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2) tlatex = ROOT.TLatex() tlatex.SetTextAngle(90) tlatex.SetTextAlign(22) tlatex.SetTextSize(11) tlatex.SetTextColor(ROOT.kGray + 2) tlatex.DrawLatexNDC(0.5, 0. + 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), " Less sculpting #rightarrow") tlatex.DrawLatexNDC(0.5, 1. - 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), " Greater separation #rightarrow") ''' # Save c.save('figures/robustness__{}_{}{}.pdf'.format(var if var is not None else 'legend', metric if var is not None else ('mva' if metric == 'rej' else 'analytical'), '_masscut' if masscut else '')) pass # Temporary style scope pass return
def main(args): # Definitions histstyle = dict(**HISTSTYLE) # Initialise args, cfg = initialise(args) # Load data #data = np.zeros(1, 95213009, 10) data, features, _ = load_data( 'data/djr_LCTopo_2.h5') # + args.input) #, test=True) # #data2, features, _ = load_data('data/djr_LCTopo_2.h5') # + args.input) #, test=True) # #data = np.concatenate((data1, data2)) #f1 = h5py.File('data/djr_LCTopo_1.h5', 'r') #f2 = h5py.File('data/djr_LCTopo_2.h5', 'r') knnCut = 0 ntrkCut = 50 emfracCut = 0.65 scale = 139 * 1000000 # (inverse nanobarn) signal_to_plot = 7 sigDict = { 0: 'All Models', 1: 'Model A, m = 2 TeV', 2: 'Model A, m = 1 TeV', 3: 'Model A, m = 1.5 TeV', 4: 'Model A, m = 2.5 TeV', 5: 'Model B, m = 1 TeV', 6: 'Model B, m = 1.5 TeV', 7: 'Model B, m = 2 TeV', 8: 'Model B, m = 2.5 TeV', 9: 'Model C, m = 1 TeV', 10: 'Model C, m = 1.5 TeV', 11: 'Model C, m = 2 TeV', 12: 'Model C, m = 2.5 TeV', 13: 'Model D, m = 1 TeV', 14: 'Model D, m = 1.5 TeV', 15: 'Model D, m = 2 TeV', 16: 'Model D, m = 2.5 TeV', } outHistFile = ROOT.TFile.Open( "figures/mjjHistograms_kNN{}_eff{}.root".format(knnCut, kNN_eff), "RECREATE") histstyle[True]['label'] = 'Multijets' histstyle[False]['label'] = 'Dark jets, {}'.format(sigDict[signal_to_plot]) # Add knn variables #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500'] base_var = 'jet_ungrtrk500' kNN_var = base_var.replace('jet', 'knn') #base_vars = ['lead_'+base_var, 'sub_'+base_var] #kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var] print data.shape with Profile("Add variables"): #for i in range(len(base_var)): print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var) add_knn(data, newfeat='lead_' + kNN_var, path='models/knn/{}_{}_{}_{}.pkl.gz'.format( FIT, base_var, kNN_eff, sigModel)) add_knn(data, newfeat='sub_' + kNN_var, path='models/knn/{}_{}_{}_{}.pkl.gz'.format( FIT, base_var, kNN_eff, sigModel)) #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel) """ base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500'] kNN_var = [var.replace('jet', 'knn') for var in base_var] with Profile("Add variables"): from run.knn.common import add_knn, MODEL, VAR as kNN_basevar, EFF as kNN_eff print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var) for i in range(len(base_var)): add_knn(data, newfeat=kNN_var[i], path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL)) print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL) """ weight = 'weight' # 'weight_test' / 'weight' bins_pt = np.linspace(450, 3500, 40) bins_mjj = np.linspace(0, 8000, 80) # Useful masks msk_bkg = data['signal'] == 0 if signal_to_plot == 0: msk_sig = data['signal'] == 1 else: msk_sig = data['sigType'] == signal_to_plot #msk_weight = data['weight']<0.2 msk_knn = (data['lead_knn_ungrtrk500'] > knnCut) & (data['sub_knn_ungrtrk500'] > knnCut) msk_ungr = (data['lead_jet_ungrtrk500'] > ntrkCut) & (data['sub_jet_ungrtrk500'] > ntrkCut) msk_emfrac = (data['lead_jet_EMFrac'] < emfracCut) & (data['sub_jet_EMFrac'] < emfracCut) msk_knn_1 = (data['lead_knn_ungrtrk500'] > knnCut) msk_ungr_1 = (data['lead_jet_ungrtrk500'] > ntrkCut) #msk_knn = (data['knn_ungrtrk500']>knnCut) #msk_ungr = (data['jet_ungrtrk500']>90.0) msk_ntrkBkg = msk_ungr & msk_emfrac & msk_bkg #& msk_weight #& msk_pt & msk_m & msk_eta msk_ntrkSig = msk_ungr & msk_emfrac & msk_sig #& msk_pt & msk_m & msk_eta msk_knnBkg = msk_knn & msk_bkg msk_knnSig = msk_knn & msk_sig msk_ntrkBkg1 = msk_ungr_1 & msk_bkg #& msk_weight #& msk_pt & msk_m & msk_eta msk_ntrkSig1 = msk_ungr_1 & msk_sig #& msk_pt & msk_m & msk_eta msk_knnBkg1 = msk_knn_1 & msk_bkg #& msk_weight #& msk_pt & msk_m & msk_eta msk_knnSig1 = msk_knn_1 & msk_sig #& msk_pt & msk_m & msk_eta msk_inclBkg = msk_bkg #& msk_weight #& msk_pt & msk_m & msk_eta msk_inclSig = msk_sig #& msk_pt & msk_m & msk_eta # Mjj dist with cut on ntrk, ungrtrk compared to inclusive selection c = rp.canvas(batch=True) hist_inclBkg = c.hist(data.loc[msk_inclBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_inclBkg, weight].values, label="Multijets, Inclusive", normalise=True, linecolor=ROOT.kGreen + 2, linewidth=3) hist_knnBkg = c.hist( data.loc[msk_knnBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_knnBkg, weight].values, label="Multijets, n_{{trk}}^{{#epsilon}}>{}".format(knnCut), normalise=True, linecolor=ROOT.kMagenta + 2, linestyle=2, linewidth=3) hist_ntrkBkg = c.hist(data.loc[msk_ntrkBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_ntrkBkg, weight].values, label="Multijets, n_{{trk}}>{}".format(ntrkCut), normalise=True, linecolor=ROOT.kOrange + 2, linestyle=2, linewidth=3) #hist_CRBkg = c.hist(data.loc[msk_CR_bkg, 'dijetmass'].values, bins=bins_mjj, weights=scale*data.loc[msk_CR_bkg, weight].values, label="CR Bkg, C<20", normalise=True, linecolor=ROOT.kGray+2, linestyle=2) c.legend(width=0.4, xmin=0.5, ymax=0.9) c.ylabel("Fraction of jets") c.xlabel("m_{jj} [GeV]") c.logy() #c.ylim(0.00005, 5) #c.save('figures/distributions/mjj_Bkg_CR20.pdf'.format(knnCut)) #c.save('figures/distributions/mjj_Bkg_CR20.eps'.format(knnCut)) c.save('figures/distributions/mjj_BkgDist_ntrk{}_knn{}_{}.pdf'.format( ntrkCut, knnCut, FIT)) c.save('figures/distributions/mjj_BkgDist_ntrk{}_knn{}_{}.eps'.format( ntrkCut, knnCut, FIT)) del c c = rp.canvas(batch=True) hist_Sig = c.hist(data.loc[msk_sig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_sig, weight].values, label="Model A, m = 2 TeV, inclusive", normalise=True, linecolor=ROOT.kGreen + 2) hist_knnSig = c.hist( data.loc[msk_knnSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnSig, weight].values, label="Model A, m = 2 TeV, #it{{n}}_{{trk}}^{{#epsilon}}>{}".format( knnCut), normalise=True, linecolor=ROOT.kMagenta + 2, linestyle=2) hist_ntrkSig = c.hist( data.loc[msk_ntrkSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_ntrkSig, weight].values, label="Model A, m = 2 TeV, #it{{n}}_{{trk}}>{}".format(ntrkCut), normalise=True, linecolor=ROOT.kOrange + 2, linestyle=2) #hist_CRSig = c.hist(data.loc[msk_CR_sig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_CR_sig, weight].values, label="Sig, CR", normalise=True, linecolor=ROOT.kGray+2, linestyle=2) c.legend(width=0.4, xmin=0.5, ymax=0.9) c.ylabel("Fraction of jets") c.xlabel("m_{jj} [GeV]") c.logy() #c.ylim(0.00005, 5) c.save('figures/distributions/mjj_SigDist_ntrk{}_knn{}_{}.pdf'.format( ntrkCut, knnCut, FIT)) c.save('figures/distributions/mjj_SigDist_ntrk{}_knn{}_{}.eps'.format( ntrkCut, knnCut, FIT)) del c c = rp.canvas(batch=True) hist_knnSig = c.hist( data.loc[msk_knnSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnSig, weight].values, label="Model A, m = 2 TeV, knn_ntrk>{}".format(knnCut), normalise=False, linecolor=ROOT.kBlue + 1, linestyle=1) hist_knnBkg = c.hist(data.loc[msk_knnBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_knnBkg, weight].values, label="Multijets, knn_ntrk>{}".format(knnCut), normalise=False, linecolor=ROOT.kMagenta + 2, linestyle=2) hist_ntrkBkg = c.hist(data.loc[msk_ntrkBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_ntrkBkg, weight].values, label="Multijets, ntrk>{}".format(ntrkCut), normalise=False, linecolor=ROOT.kOrange + 2, linestyle=2) c.legend(width=0.4, xmin=0.3, ymax=0.9) c.ylabel("Number of events") c.xlabel("m_{jj} [GeV]") c.logy() #c.ylim(0.00005, 5) c.save('figures/distributions/mjj_Dist_noNorm_knn{}_{}.pdf'.format( knnCut, FIT)) c.save('figures/distributions/mjj_Dist_noNorm_knn{}_{}.eps'.format( knnCut, FIT)) bins_mjj = np.linspace(0, 10000, 50) # Unscaled histograms for calculating efficiencies hist_inclBkg = c.hist(data.loc[msk_inclBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_inclBkg, weight].values, normalise=False) hist_inclSig = c.hist(data.loc[msk_inclSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_inclSig, weight].values, normalise=False) hist_ntrkSig = c.hist(data.loc[msk_ntrkSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_ntrkSig, weight].values, normalise=False) hist_knnSig = c.hist(data.loc[msk_knnSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnSig, weight].values, normalise=False) hist_ntrkSig1 = c.hist(data.loc[msk_ntrkSig1, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_ntrkSig1, weight].values, normalise=False) hist_ntrkBkg1 = c.hist(data.loc[msk_ntrkBkg1, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_ntrkBkg1, weight].values, normalise=False) hist_knnBkg1 = c.hist(data.loc[msk_knnBkg1, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnBkg1, weight].values, normalise=False) hist_knnSig1 = c.hist(data.loc[msk_knnSig1, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnSig1, weight].values, normalise=False) print "Bkg inclusive integral: ", hist_inclBkg.GetEffectiveEntries() print "Sig inclusive integral: ", hist_inclSig.GetEffectiveEntries() print "Bkg pass kNN eff entries / integral: ", hist_knnBkg.GetEffectiveEntries( ), hist_knnBkg.Integral() print "Sig pass kNN eff entries / integral: ", hist_knnSig.GetEffectiveEntries( ), hist_knnSig.Integral() print "Bkg pass ntrk eff entries / integral: ", hist_ntrkBkg.GetEffectiveEntries( ), hist_ntrkBkg.Integral() print "Sig pass ntrk eff entries / integral: ", hist_ntrkSig.GetEffectiveEntries( ), hist_ntrkSig.Integral() print "Bkg Eff. knn_ntrk> {}, eff. entries: ".format( knnCut), 100 * hist_knnBkg.GetEffectiveEntries( ) / hist_inclBkg.GetEffectiveEntries() print "Sig Eff. knn_ntrk> {}, eff. entries: ".format( knnCut), 100 * hist_knnSig.GetEffectiveEntries( ) / hist_inclSig.GetEffectiveEntries() print "Bkg Eff. knn_ntrk> {}, integral: ".format( knnCut), 100 * hist_knnBkg.Integral() / hist_inclBkg.Integral() print "Sig Eff. knn_ntrk> {}, integral: ".format( knnCut), 100 * hist_knnSig.Integral() / hist_inclSig.Integral() print "Bkg Eff. ntrk>{}, eff. entries: ".format( ntrkCut), 100 * hist_ntrkBkg.GetEffectiveEntries( ) / hist_inclBkg.GetEffectiveEntries() print "Sig Eff. ntrk>{}, eff. entries: ".format( ntrkCut), 100 * hist_ntrkSig.GetEffectiveEntries( ) / hist_inclSig.GetEffectiveEntries( ) #, hist_ntrkSig.GetEffectiveEntries() print "Bkg Eff. 1 jet knn_ntrk> {}, eff. entries: ".format( knnCut), 100 * hist_knnBkg1.GetEffectiveEntries( ) / hist_inclBkg.GetEffectiveEntries() print "Sig Eff. 1 jet knn_ntrk> {}, eff. entries: ".format( knnCut), 100 * hist_knnSig1.GetEffectiveEntries( ) / hist_inclSig.GetEffectiveEntries() print "Bkg Eff. 1 jet knn_ntrk> {}, integral: ".format( knnCut), 100 * hist_knnBkg1.GetEffectiveEntries( ) / hist_inclBkg.GetEffectiveEntries() print "Sig Eff. 1 jet knn_ntrk> {}, integral: ".format( knnCut), 100 * hist_knnSig1.GetEffectiveEntries( ) / hist_inclSig.GetEffectiveEntries() outHistFile.cd() hist_knnBkg.SetName("bkg_knn") hist_knnSig.SetName("sig_knn") hist_knnBkg.Write() hist_knnSig.Write() outHistFile.Close() # Mjj dist for CR compared to inclusive selection """