def wrapper(*args, **kwargs): # Run study c, args, path = f(*args, **kwargs) # Save if args.save: dir = '/'.join(path.split('/')[:-1]) mkdir(dir) suffix = path.split('.')[-1] if len(suffix) < 4: base = '.'.join(path.split('.')[:-1]) c.save(base + '.eps') c.save(base + '.pdf') c.save(base + '.C') else: c.save(path) pass pass # Show if args.show: c.show() pass return
def distribution(data_, args, feat, pt_range, mass_range, title=None): """ Perform study of substructure variable distributions. Saves plot `figures/distribution_[feat].pdf` Arguments: data: Pandas data frame from which to read data. args: Namespace holding command-line arguments. feat: Feature for which to plot signal- and background distributions. """ # Select data if pt_range is not None: data = data_[(data_['pt'] > pt_range[0]) & (data_['pt'] < pt_range[1])] else: data = data_ pass if mass_range is not None: data = data[(data['m'] > mass_range[0]) & (data['m'] < mass_range[1])] pass # Define bins xmin = wpercentile(data[feat].values, 1, weights=data['weight_test'].values) xmax = wpercentile(data[feat].values, 99, weights=data['weight_test'].values) if feat == 'D2-k#minusNN': print "distribution: kNN feature '{}'".format(feat) xmin, xmax = -1., 2. elif feat.lower().startswith('d2'): print "distribution: D2 feature '{}'".format(feat) xmin, xmax = 0., 3. elif 'tau21' in feat.lower(): xmin, xmax = 0., 1. pass snap = 0.5 # Snap to nearest multiple in appropriate direction xmin = np.floor(xmin / snap) * snap xmax = np.ceil(xmax / snap) * snap bins = np.linspace(xmin, xmax, 50 + 1, endpoint=True) # Perform plotting c = plot(args, data, feat, bins, pt_range, mass_range) # Output mkdir('figures/distribution/') path = 'figures/distribution/distribution_{}{}{}.pdf'.format( standardise(feat), '__pT{:.0f}_{:.0f}'.format(pt_range[0], pt_range[1]) if pt_range is not None else '', '__mass{:.0f}_{:.0f}'.format( mass_range[0], mass_range[1]) if mass_range is not None else '') c.save(path=path) #this was actually missing, lol return c, args, path
def plot2D (*argv): """ Method for delegating 2D plotting. """ # Unpack arguments data, ddt, lda, contours, binsx, binsy, variable = argv with TemporaryStyle() as style: # Style style.SetNumberContours(10) # Canvas c = rp.canvas(batch=True) # Axes c.hist([binsy[0]], bins=[binsx[0], binsx[-1]], linestyle=0, linewidth=0) # Plotting contours for sig in [0,1]: c.hist2d(contours[sig], linecolor=rp.colours[1 + 3 * sig], label="Signal" if sig else "Background", option='CONT3', legend_option='L') pass # Linear fit x1, x2 = 1.5, 5.0 intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_ y1 = intercept + x1 * coef y2 = intercept + x2 * coef c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='DDT transform fit', linewidth=1, linestyle=1, option='L') # LDA decision boundary y1 = lda.intercept_ + x1 * lda.coef_ y2 = lda.intercept_ + x2 * lda.coef_ c.plot([y1,y2], bins=[x1,x2], label='LDA boundary', linewidth=1, linestyle=2, option='L') # Decorations c.text(["#sqrt{s} = 13 TeV"], qualifier=QUALIFIER, ATLAS=False) c.legend() c.ylim(binsy[0], binsy[-1]) c.xlabel("Large-#it{R} jet " + latex('rhoDDT', ROOT=True)) if variable == VAR_TAU21: c.ylabel("Large-#it{R} jet " + latex('#tau_{21}', ROOT=True)) #changed these to latex formatting elif variable == VAR_N2: c.ylabel("Large-#it{R} jet " + latex('N_{2}', ROOT=True)) elif variable == VAR_DECDEEP: c.ylabel("Large-#it{R} jet " + latex('dec_deepWvsQCD', ROOT=True)) elif variable == VAR_DEEP: c.ylabel("Large-#it{R} jet " + latex('deepWvsQCD', ROOT=True)) # Save mkdir('figures/ddt') c.save('figures/ddt/ddt_{}_2d.pdf'.format(variable)) pass return
def plot(*argv): """ Method for delegating plotting. """ # Unpack arguments experiment, means, graph, idx_improvements, best_mean, bins = argv # Plot results c = rp.canvas(batch=True) ymax = 1.0 # 1.5 ymin = 0.3 oobx = map(lambda t: t[0], filter(lambda t: t[1] > ymax, enumerate(means))) ooby = np.ones_like(oobx) * 0.96 * (ymax - ymin) + ymin # Plots c.graph(graph, markercolor=rp.colours[1], linecolor=rp.colours[1], markersize=0.7, option='AP', label='Evaluations', legend_option='PE') c.graph(ooby, bins=oobx, markercolor=rp.colours[1], markerstyle=22, option='P') c.graph(best_mean, bins=bins, linecolor=rp.colours[5], linewidth=2, option='L', label='Best result') c.graph(best_mean[idx_improvements], bins=bins[idx_improvements], markercolor=rp.colours[5], markersize=0.5, option='P') # Decorations c.pad()._yaxis().SetNdivisions(505) c.xlabel("Bayesian optimisation step") c.ylabel("Cross-validation optimisation metric, L_{clf}^{val}") c.xlim(0, len(bins)) #c.ylim(0, ymax) c.ylim(0.3, 1.0) c.legend(width=0.22, ymax=0.816) c.text(["#sqrt{s} = 13 TeV", "Neural network (NN) classifier"], qualifier=QUALIFIER) # Save mkdir('figures/optimisation/') c.save('figures/optimisation/optimisation_{}.pdf'.format(experiment)) return
def plot(profile, fit): """ Method for delegating plotting. """ # rootplotting c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile.GetXaxis().SetTitle(latex(VARX, ROOT=True) + " [GeV]") #+ " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle(latex(VARY, ROOT=True) + " [GeV]") profile.GetZaxis().SetTitle("%s %s^{(%s%%)}" % ("#it{k}-NN fitted" if fit else "Measured", latex(VAR, ROOT=True), EFF)) profile.GetYaxis().SetNdivisions(505) profile.GetZaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.8) profile.GetZaxis().SetTitleOffset(1.3) if ZRANGE: profile.GetZaxis().SetRangeUser(*ZRANGE) pass profile.SetContour(NB_CONTOUR) # Draw profile.Draw('COLZ') BOUNDS[0].DrawCopy("SAME") BOUNDS[1].DrawCopy("SAME") #c.latex("m > 50 GeV", -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3) #c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3) # Decorations #c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15) c.text(["#sqrt{s} = 13 TeV", "Multijets"], ATLAS=False, textcolor=ROOT.kWhite) # Save mkdir('figures/knn/') c.save('figures/knn/knn_{}_{:s}_{}_{}.pdf'.format( 'fit' if fit else 'profile', VAR, EFF, MODEL)) c.save('figures/knn/knn_{}_{:s}_{}_{}.eps'.format( 'fit' if fit else 'profile', VAR, EFF, MODEL)) pass
def wrapper(*args, **kwargs): # Run study c, args, path = f(*args, **kwargs) # Save if args.save: dir = '/'.join(path.split('/')[:-1]) mkdir(dir) c.save(path) pass # Show if args.show: c.show() pass return
def plot1D (*argv): """ Method for delegating 1D plotting. """ # Unpack arguments graphs, ddt, arr_x = argv # Style ROOT.gStyle.SetTitleOffset(1.4, 'x') # Canvas c = rp.canvas(batch=True) # Setup pad = c.pads()[0]._bare() pad.cd() pad.SetTopMargin(0.10) pad.SetTopMargin(0.10) # Profiles c.graph(graphs['Tau21'], label="Original, #tau_{21}", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs['Tau21DDT'], label="Transformed, #tau_{21}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') # Fit x1, x2 = min(arr_x), max(arr_x) intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_ y1 = intercept + x1 * coef y2 = intercept + x2 * coef c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='Linear fit', linewidth=1, linestyle=1, option='L') # Decorations c.xlabel("Large-#it{R} jet #rho^{DDT} = log(m^{2}/ p_{T} / 1 GeV)") c.ylabel("#LT#tau_{21}#GT, #LT#tau_{21}^{DDT}#GT") c.text(["#sqrt{s} = 13 TeV, Multijets"], qualifier=QUALIFIER) c.legend(width=0.25, xmin=0.57, ymax=None if "Internal" in QUALIFIER else 0.85) c.ylim(0, 1.4) c.latex("Fit range", sum(FIT_RANGE) / 2., 0.08, textsize=13, textcolor=ROOT.kGray + 2) c.xline(FIT_RANGE[0], ymax=0.82, text_align='BR', linecolor=ROOT.kGray + 2) c.xline(FIT_RANGE[1], ymax=0.82, text_align='BL', linecolor=ROOT.kGray + 2) # Save mkdir('figures/ddt/') c.save('figures/ddt/ddt.pdf') return
def save_hdf5 (data, path, name='dataset', gzip=True): """ Save numpy recarray to HDF5 file. Arguments: data: Numpy recarray to be saved to file. path: Path to HDF5 save file. name: Name of dataset in which to store the data. gzip: Whether to apply gzip compression to HDF5 file. """ # Ensure directory exists basedir = '/'.join(path.split('/')[:-1]) if basedir: mkdir(basedir) # Save array to HDF5 file with h5py.File(path, 'w') as hf: hf.create_dataset(name, data=data, compression="gzip" if gzip else None) pass return
def save_patch(patch, filename): """ ... Arguments: ... """ # @TEMP: Debug print "- " * 40 print "Saving the following patch to '{}':".format(filename) print patch print "- " * 40 # Make sure target directory exists directory = '/'.join(filename.split('/')[:-1]) mkdir(directory) # Dump patch to JSONo file with open(filename, 'w') as f: json.dump(patch, f, indent=4, sort_keys=True) pass return
def main(args): # Initialise args, cfg = initialise(args) # Load data data, _, _ = load_data(args.input + 'data.h5', train=True) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # ------------------------------------------------------------------------- #### #### # Initialise Keras backend #### initialise_backend(args) #### #### # Neural network-specific initialisation of the configuration dict #### initialise_config(args, cfg) #### #### # Keras import(s) #### from keras.models import load_model #### #### # NN #### from run.adversarial.common import add_nn #### with Profile("NN"): #### classifier = load_model('models/adversarial/classifier/full/classifier.h5') #### add_nn(data, classifier, 'NN') #### pass # ------------------------------------------------------------------------- # Fill measured profile profile_meas, _ = fill_profile(data[msk_bkg]) # Add k-NN variable knnfeat = 'knn' add_knn(data, newfeat=knnfeat, path='models/knn/knn_{}_{}.pkl.gz'.format(VAR, EFF)) # Loading KNN classifier knn = loadclf('models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(VAR, EFF)) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 edges, centres = dict(), dict() for ax, var in zip(['x', 'y'], [VARX, VARY]): # Short-hands vbins, vmin, vmax = AXIS[var] # Re-binned bin edges @TODO: Make standardised right away? edges[ax] = np.interp( np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), range(vbins + 1), np.linspace(vmin, vmax, vbins + 1, endpoint=True)) # Re-binned bin centres centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax]) pass # Get predictions evaluated at re-binned bin centres g = dict() g['x'], g['y'] = np.meshgrid(centres['x'], centres['y']) g['x'], g['y'] = standardise(g['x'], g['y']) X = np.vstack((g['x'].flatten(), g['y'].flatten())).T fit = knn.predict(X).reshape(g['x'].shape).T # Fill ROOT "profile" profile_fit = ROOT.TH2F('profile_fit', "", len(edges['x']) - 1, edges['x'].flatten('C'), len(edges['y']) - 1, edges['y'].flatten('C')) root_numpy.array2hist(fit, profile_fit) pass # Plotting with Profile("Plotting"): for fit in [False, True]: # Select correct profile profile = profile_fit if fit else profile_meas # Plot plot(profile, fit) pass pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency for sig, msk in zip([True, False], [msk_sig, msk_bkg]): if sig: rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) else: rgbs = [(255 / 255., 51 / 255., 4 / 255.), (247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.array([0] + list( np.linspace(0, 1, nb_cols - 1, endpoint=True) * (1. - EFF / 100.) + EFF / 100.)) pass ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) # Define arrays shape = (AXIS[VARX][0], AXIS[VARY][0]) bins = [ np.linspace(AXIS[var][1], AXIS[var][2], AXIS[var][0] + 1, endpoint=True) for var in VARS ] x, y, z = (np.zeros(shape) for _ in range(3)) # Create `profile` histogram profile = ROOT.TH2F('profile', "", len(bins[0]) - 1, bins[0].flatten('C'), len(bins[1]) - 1, bins[1].flatten('C')) # Compute inclusive efficiency in bins of `VARY` effs = list() for edges in zip(bins[1][:-1], bins[1][1:]): msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1]) msk_pass = data[knnfeat] < 0 num = data.loc[msk & msk_bin & msk_pass, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() effs.append(num / den) pass # Fill profile for i, j in itertools.product(*map(range, shape)): # Bin edges in x and y edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)] # Masks msks = [(data[var] > edges[dim][0]) & (data[var] <= edges[dim][1]) for dim, var in enumerate(VARS)] msk_bin = reduce(lambda x, y: x & y, msks) data_ = data[msk & msk_bin] # Set non-zero bin content if np.sum(msk & msk_bin): msk_pass = data_[knnfeat] < 0 num = data.loc[msk & msk_bin & msk_pass, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() eff = num / den profile.SetBinContent(i + 1, j + 1, eff) pass pass c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True) + " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle("Large-#it{R} jet " + latex(VARY, ROOT=True) + " [GeV]") profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" % (latex(VAR, ROOT=True), EFF)) profile.GetYaxis().SetNdivisions(505) profile.GetZaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.8) profile.GetZaxis().SetTitleOffset(1.3) zrange = (0., 1.) if zrange: profile.GetZaxis().SetRangeUser(*zrange) pass profile.SetContour(NB_CONTOUR) # Draw profile.Draw('COLZ') # Decorations c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15) c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"], ATLAS=False) # -- Efficiencies xaxis = profile.GetXaxis() yaxis = profile.GetYaxis() tlatex = ROOT.TLatex() tlatex.SetTextColor(ROOT.kGray + 2) tlatex.SetTextSize(0.023) tlatex.SetTextFont(42) tlatex.SetTextAlign(32) xt = xaxis.GetBinLowEdge(xaxis.GetNbins()) for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)): yt = yaxis.GetBinCenter(ibin) tlatex.DrawLatex( xt, yt, "%s%.1f%%" % ("#bar{#varepsilon}^{rel}_{%s} = " % ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.)) pass # -- Bounds BOUNDS[0].DrawCopy("SAME") BOUNDS[1].DrawCopy("SAME") c.latex("m > 50 GeV", -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3) c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3) # Save mkdir('figures/knn/') c.save('figures/knn/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', VAR, EFF)) pass return
def plot_classifier_training_loss( num_folds, basedir='models/adversarial/classifier/crossval/'): """ Plot the classifier training loss. """ # Check(s) if not basedir.endswith('/'): basedir += '/' pass # Get paths to classifier training losses paths = sorted( glob.glob( basedir + '/history__crossval_classifier__*of{}.json'.format(num_folds))) if len(paths) == 0: print "No models found for classifier CV study." return # Read losses from files losses = {'train': list(), 'val': list()} for path in paths: with open(path, 'r') as f: d = json.load(f) pass loss = np.array(d['val_loss']) print "Outliers:", loss[np.abs(loss - 0.72) < 0.02] loss[np.abs(loss - 0.72) < 0.02] = np.nan # @FIXME: This probably isn't completely kosher losses['val'].append(loss) loss = np.array(d['loss']) losses['train'].append(loss) pass # Define variable(s) bins = np.arange(len(loss)) histbins = np.arange(len(loss) + 1) + 0.5 # Canvas c = rp.canvas(batch=True) # Plots categories = list() for name, key, colour, linestyle in zip(['Validation', 'Training'], ['val', 'train'], [rp.colours[4], rp.colours[1]], [1, 2]): # Histograms loss_mean = np.nanmean(losses[key], axis=0) loss_std = np.nanstd(losses[key], axis=0) hist = ROOT.TH1F(key + '_loss', "", len(histbins) - 1, histbins) for idx in range(len(loss_mean)): hist.SetBinContent(idx + 1, loss_mean[idx]) hist.SetBinError(idx + 1, loss_std[idx]) pass c.hist([0], bins=[0, max(bins)], linewidth=0, linestyle=0) # Force correct x-axis c.hist(hist, fillcolor=colour, alpha=0.3, option='LE3') c.hist(hist, linecolor=colour, linewidth=3, linestyle=linestyle, option='HISTL') categories += [(name, { 'linestyle': linestyle, 'linewidth': 3, 'linecolor': colour, 'fillcolor': colour, 'alpha': 0.3, 'option': 'FL' })] pass # Decorations c.pads()[0]._yaxis().SetNdivisions(505) c.xlabel("Training epoch") c.ylabel("Cross-validation classifier loss, L_{clf}") c.xlim(0, max(bins)) c.ylim(0.3, 0.5) c.legend(categories=categories, width=0.25) # ..., xmin=0.475 c.text(TEXT + ["#it{W} jet tagging", "Neural network (NN) classifier"], qualifier=QUALIFIER) # Save mkdir('figures/') c.save('figures/loss_classifier.pdf') return
def plot_adversarial_training_loss( lambda_reg, num_folds, pretrain_epochs, H_prior=None, basedir='models/adversarial/combined/crossval/'): """ Plot the classifier, adversary, and combined losses for the adversarial training of the jet classifier. """ # Check(s) if not basedir.endswith('/'): basedir += '/' pass # Define variable(s) digits = int(np.ceil(max(-np.log10(lambda_reg), 0))) lambda_str = '{l:.{d:d}f}'.format(d=digits, l=lambda_reg).replace('.', 'p') # Get paths to all cross-validation adversarially trained classifiers if num_folds: paths = sorted( glob.glob(basedir + 'history__combined_lambda{}__*of{}.json'.format( lambda_str, num_folds))) else: paths = glob.glob(basedir + 'history__combined_lambda{}.json'.format(lambda_str)) pass print "Found {} paths.".format(len(paths)) if len(paths) == 0: return # Store losses keys = [ 'train_comb', 'train_clf', 'train_adv', 'val_comb', 'val_clf', 'val_adv' ] losses = {key: list() for key in keys} for path in paths: with open(path, 'r') as f: d = json.load(f) pass # Loop loss classes for name, prefix in zip(['train', 'val'], ['', 'val_']): try: # Classifier loss = np.array(d[prefix + 'classifier_loss']) loss[loss > 7.0] = np.nan losses[name + '_clf'].append(loss) # Adversary loss = np.array(d[prefix + 'adversary_loss']) losses[name + '_adv'].append(loss) # Combined losses[name + '_comb'].append(losses[name + '_clf'][-1] - lambda_reg * losses[name + '_adv'][-1]) except KeyError: pass # No validation pass # Plot results c = rp.canvas(batch=True, num_pads=3, ratio=False, size=(600, 800)) bins = np.arange(len(loss)) histbins = np.arange(len(loss) + 1) - 0.5 # Axes for idx in range(3): c.pads()[idx].hist([0], bins=[0, len(bins) - 1], linewidth=0, linestyle=0) # Force correct x-axis pass # Plots categories = list() for ityp, typ in enumerate(['val', 'train']): for igrp, grp in enumerate(['clf', 'adv', 'comb']): key = '{}_{}'.format(typ, grp) colour = rp.colours[1 if typ == 'train' else 4] # Create histogram try: loss_mean = np.nanmean(losses[key], axis=0) loss_std = np.nanstd(losses[key], axis=0) hist = ROOT.TH1F(key, "", len(histbins) - 1, histbins) for ibin in range(len(loss_mean)): hist.SetBinContent(ibin + 1, loss_mean[ibin]) hist.SetBinError(ibin + 1, loss_std[ibin]) pass c.pads()[igrp].hist(hist, fillcolor=colour, linestyle=ityp + 1, linewidth=0, alpha=0.3, option='LE3') c.pads()[igrp].hist(hist, fillcolor=0, fillstyle=0, linecolor=colour, linestyle=ityp + 1, linewidth=3, option='HISTL') except TypeError: pass # No validation if igrp == 0: categories += [('Training' if typ == 'train' else 'Validation', { 'linestyle': ityp + 1, 'linewidth': 3, 'fillcolor': colour, 'alpha': 0.3, 'linecolor': colour, 'option': 'FL' })] pass pass pass # Formatting pads margin = 0.2 ymins, ymaxs = list(), list() clf_opt_val = None for ipad, pad in enumerate(c.pads()): tpad = pad._bare() # ROOT.TPad f = ipad / float(len(c.pads()) - 1) tpad.SetLeftMargin(0.20) tpad.SetBottomMargin(f * margin) tpad.SetTopMargin((1 - f) * margin) pad._xaxis().SetNdivisions(505) pad._yaxis().SetNdivisions(505) if ipad < len(c.pads()) - 1: # Not bottom pad pad._xaxis().SetLabelOffset(9999.) pad._xaxis().SetTitleOffset(9999.) else: pad._xaxis().SetTitleOffset(3.5) pass ymin, ymax = list(), list() for hist in pad._primitives: if not isinstance(hist, ROOT.TGraph): ymin.append(get_min(hist)) ymax.append(get_max(hist)) pass pass # Get reference-line value clf_opt_val = clf_opt_val or c.pads()[0]._primitives[1].GetBinContent( 1) ref = clf_opt_val if ipad == 0 else ( H_prior if ipad == 1 else clf_opt_val - lambda_reg * H_prior) ymin = min(ymin + [ref]) ymax = max(ymax + [ref]) ydiff = ymax - ymin ymin -= ydiff * 0.2 ymax += ydiff * (0.7 if ipad == 0 else (0.7 if ipad == 1 else 0.2)) if ipad == 0: # ymin = 0.25 ymax *= 1.2 pass pad.ylim(ymin, ymax) ymins.append(ymin) ymaxs.append(ymax) pass c._bare().Update() # Pre-training boxes boxes = list() for ipad, pad in enumerate(c.pads()): pad._bare().cd() boxes.append(ROOT.TBox(0, ymins[ipad], pretrain_epochs, ymaxs[ipad])) boxes[-1].SetFillColorAlpha(ROOT.kBlack, 0.05) boxes[-1].Draw("SAME") pass # Vertical lines for ipad in range(len(c.pads())): align = 'TR' if ipad < 2 else 'BR' c.pads()[ipad].xline( pretrain_epochs, ymin=ymins[ipad], ymax=ymaxs[ipad], text=' Adv. pre-training ' if ipad == 0 else None, text_align=align, linestyle=1, linecolor=ROOT.kGray + 2) pass # Horizontal lines c.pads()[0].yline(clf_opt_val) if H_prior is not None: c.pads()[1].yline(H_prior) c.pads()[2].yline(clf_opt_val - lambda_reg * (H_prior)) pass opts = dict(align=31, textcolor=ROOT.kGray + 2, textsize=14) c.pads()[0].latex("Stand-alone NN ", bins[-1] * 0.98, clf_opt_val + (ymaxs[0] - ymins[0]) * 0.03, **opts) if H_prior is not None: c.pads()[1].latex("#it{H}(prior) ", bins[-1] * 0.98, H_prior + (ymaxs[1] - ymins[1]) * 0.03, **opts) opts['align'] = 33 c.pads()[2].latex( "Ideal ", bins[-1] * 0.98, clf_opt_val - lambda_reg * (H_prior) - (ymaxs[2] - ymins[2]) * 0.03, **opts) pass # Decorations ROOT.gStyle.SetTitleOffset(2.0, 'y') # 2.2 c.xlabel("Training epoch") c.pads()[0].ylabel("#it{L}_{clf.}") c.pads()[1].ylabel("#it{L}_{adv.}") c.pads()[2].ylabel("#it{L}_{clf.} #minus #lambda #it{L}_{adv.}") for pad in c.pads(): pad.xlim(0, max(bins) - 1) pass c.pads()[0].text([], xmin=0.2, ymax=0.85, qualifier=QUALIFIER) c.pads()[1].text([ "#sqrt{s} = 13 TeV", "#it{W} jet tagging", "Adversarial training (#lambda = %s)" % (lambda_str.replace('p', '.')) ], ATLAS=False, ymax=0.70, xmin=0.27) c.pads()[0].legend(xmin=0.60, ymax=0.70, categories=categories) # Save mkdir('figures/') c.save('figures/loss_adversarial_lambda{}_{}.pdf'.format( lambda_str, 'full' if num_folds is None else 'cv')) return
def test(data, variable, bg_eff, signal_above=False): # Shout out to Cynthia Brewer and Mark Harrower # [http://colorbrewer2.org]. Palette is colorblind-safe. rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # Fill measured profile with Profile("filling profile"): profile_meas, _ = fill_profile(data[msk_bkg], variable, bg_eff, signal_above=signal_above) # Add k-NN variable with Profile("adding variable"): knnfeat = 'knn' #add_knn(data, feat=variable, newfeat=knnfeat, path='knn_fitter/models/knn_{}_{}.pkl.gz'.format(variable, bg_eff)) add_knn(data, feat=variable, newfeat=knnfeat, path=args.output + '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) # Loading KNN classifier with Profile("loading model"): #knn = loadclf('knn_fitter/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) knn = loadclf( args.output + '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff)) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 edges, centres = dict(), dict() for ax, var in zip(['x', 'y'], [VARX, VARY]): # Short-hands vbins, vmin, vmax = AXIS[var] # Re-binned bin edges edges[ax] = np.interp( np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), range(vbins + 1), np.linspace(vmin, vmax, vbins + 1, endpoint=True)) # Re-binned bin centres centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax]) pass # Get predictions evaluated at re-binned bin centres g = dict() g['x'], g['y'] = np.meshgrid(centres['x'], centres['y']) g['x'], g['y'] = standardise(g['x'], g['y']) X = np.vstack((g['x'].flatten(), g['y'].flatten())).T fit = knn.predict(X).reshape(g['x'].shape).T # Fill ROOT "profile" profile_fit = ROOT.TH2F('profile_fit', "", len(edges['x']) - 1, edges['x'].flatten('C'), len(edges['y']) - 1, edges['y'].flatten('C')) root_numpy.array2hist(fit, profile_fit) pass # Plotting for fit in [False, True]: # Select correct profile profile = profile_fit if fit else profile_meas # Plot plot(profile, fit, variable, bg_eff) pass pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency for sig, msk in zip([True, False], [msk_sig, msk_bkg]): if sig: print "working on signal" else: print "working on bg" if sig: rgbs = [(247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.linspace(0, 1, nb_cols, endpoint=True) else: rgbs = [(255 / 255., 51 / 255., 4 / 255.), (247 / 255., 251 / 255., 255 / 255.), (222 / 255., 235 / 255., 247 / 255.), (198 / 255., 219 / 255., 239 / 255.), (158 / 255., 202 / 255., 225 / 255.), (107 / 255., 174 / 255., 214 / 255.), (66 / 255., 146 / 255., 198 / 255.), (33 / 255., 113 / 255., 181 / 255.), (8 / 255., 81 / 255., 156 / 255.), (8 / 255., 48 / 255., 107 / 255.)] red, green, blue = map(np.array, zip(*rgbs)) nb_cols = len(rgbs) stops = np.array([0] + list( np.linspace(0, 1, nb_cols - 1, endpoint=True) * (1. - bg_eff / 100.) + bg_eff / 100.)) pass ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue, NB_CONTOUR) # Define arrays shape = (AXIS[VARX][0], AXIS[VARY][0]) bins = [ np.linspace(AXIS[var][1], AXIS[var][2], AXIS[var][0] + 1, endpoint=True) for var in VARS ] x, y, z = (np.zeros(shape) for _ in range(3)) # Create `profile` histogram profile = ROOT.TH2F('profile', "", len(bins[0]) - 1, bins[0].flatten('C'), len(bins[1]) - 1, bins[1].flatten('C')) # Compute inclusive efficiency in bins of `VARY` effs = list() for edges in zip(bins[1][:-1], bins[1][1:]): msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1]) if signal_above: msk_pass = data[knnfeat] > 0 # ensure correct cut direction else: msk_pass = data[knnfeat] < 0 num_msk = msk * msk_bin * msk_pass num = data.loc[num_msk, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() effs.append(num / den) pass # Fill profile with Profile("Fill profile"): for i, j in itertools.product(*map(range, shape)): #print "Fill profile - (i, j) = ({}, {})".format(i,j) # Bin edges in x and y edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)] # Masks msks = [ (data[var] > edges[dim][0]) & (data[var] <= edges[dim][1]) for dim, var in enumerate(VARS) ] msk_bin = reduce(lambda x, y: x & y, msks) # Set non-zero bin content if np.sum(msk & msk_bin): if signal_above: msk_pass = data[ knnfeat] > 0 # ensure correct cut direction else: msk_pass = data[knnfeat] < 0 num_msk = msk * msk_bin * msk_pass num = data.loc[num_msk, 'weight_test'].values.sum() den = data.loc[msk & msk_bin, 'weight_test'].values.sum() eff = num / den profile.SetBinContent(i + 1, j + 1, eff) pass c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True) + " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle("Large-#it{R} jet " + latex(VARY, ROOT=True) + " [GeV]") profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" % (latex(variable, ROOT=True), bg_eff)) profile.GetYaxis().SetNdivisions(505) profile.GetZaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.8) profile.GetZaxis().SetTitleOffset(1.3) zrange = (0., 1.) if zrange: profile.GetZaxis().SetRangeUser(*zrange) pass profile.SetContour(NB_CONTOUR) # Draw profile.Draw('COLZ') # Decorations c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15, ATLAS=False) c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"], ATLAS=False) # -- Efficiencies xaxis = profile.GetXaxis() yaxis = profile.GetYaxis() tlatex = ROOT.TLatex() tlatex.SetTextColor(ROOT.kGray + 2) tlatex.SetTextSize(0.023) tlatex.SetTextFont(42) tlatex.SetTextAlign(32) xt = xaxis.GetBinLowEdge(xaxis.GetNbins()) for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)): yt = yaxis.GetBinCenter(ibin) tlatex.DrawLatex( xt, yt, "%s%.1f%%" % ("#bar{#varepsilon}^{rel}_{%s} = " % ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.)) pass # -- Bounds BOUNDS[0].DrawCopy("SAME") BOUNDS[1].DrawCopy("SAME") c.latex("m > 50 GeV", -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3) c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3) # Save mkdir('knn_fitter/figures/') c.save('knn_fitter/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', variable, bg_eff)) mkdir(args.output + '/figures/') c.save(args.output + '/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format( 'sig' if sig else 'bkg', variable, bg_eff)) pass return
def plot(*argv): """ Method for delegating plotting. """ # Unpack arguments experiment, means, graph, idx_improvements, best_mean, bins = argv # Plot results c = rp.canvas(batch=True) if experiment == 'classifier': ymax = 1.0 # 1.5 ymin = 0.3 else: ymax = 1500.0 ymin = 0.0 pass oobx = map(lambda t: t[0], filter(lambda t: t[1] > ymax, enumerate(means))) ooby = np.ones_like(oobx) * 0.96 * (ymax - ymin) + ymin # Plots markersize = 0.8 c.graph(graph, markercolor=rp.colours[1], linecolor=rp.colours[1], markerstyle=20, markersize=markersize, option='AP', label='Evaluations', legend_option='PE') if len(ooby): c.graph(ooby, bins=oobx, markercolor=rp.colours[1], markerstyle=22, option='P') pass c.graph(best_mean, bins=bins, linecolor=rp.colours[5], linewidth=2, option='L') c.graph(best_mean[idx_improvements], bins=bins[idx_improvements], markercolor=rp.colours[5], markerstyle=24, markersize=markersize, option='P') # Decorations c.pad()._yaxis().SetNdivisions(505) c.xlabel("Bayesian optimisation step") c.ylabel("Cross-val. optimisation metric, " + ("L_{clf}^{val}" if experiment == 'classifier' else '1/#varepsilon_{bkg}^{rel} + #lambda/JSD')) c.xlim(0, len(bins)) c.ylim(ymin, ymax) c.legend(width=0.22, ymax=0.816, categories=[ ('Best result', dict(linecolor=rp.colours[5], linewidth=2, markercolor=rp.colours[5], markerstyle=24, option='LP')), ]) c.text(["#sqrt{s} = 13 TeV"] + \ (["Neural network (NN) classifier"] if experiment == 'classifier' else ["Adversarial neural network (ANN)", "classifier"]), qualifier=QUALIFIER) # Save mkdir('figures/optimisation/') c.save('figures/optimisation/optimisation_{}.pdf'.format(experiment)) return
def main (args): # Initialise args, cfg = initialise(args) # Load data data, _, _ = load_data('data/' + args.input) #, test=True) msk_sig = data['signal'] == 1 msk_bkg = ~msk_sig # ------------------------------------------------------------------------- #### #### # Initialise Keras backend #### initialise_backend(args) #### #### # Neural network-specific initialisation of the configuration dict #### initialise_config(args, cfg) #### #### # Keras import(s) #### from keras.models import load_model #### #### # NN #### from run.adversarial.common import add_nn #### with Profile("NN"): #### classifier = load_model('models/adversarial/classifier/full/classifier.h5') #### add_nn(data, classifier, 'NN') #### pass # ------------------------------------------------------------------------- # Fill measured profile profile_meas, (x,percs, err) = fill_profile_1D(data[msk_bkg]) weights = 1/err # Add k-NN variable knnfeat = 'knn' orgfeat = VAR add_knn(data, newfeat=knnfeat, path='models/knn/{}_{}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) # Loading KNN classifier knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) #knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) X = x.reshape(-1,1) # Filling fitted profile with Profile("Filling fitted profile"): rebin = 8 # Short-hands vbins, vmin, vmax = AXIS[VARX] # Re-binned bin edges @TODO: Make standardised right away? # edges = np.interp(np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), # range(vbins + 1), # np.linspace(vmin, vmax, vbins + 1, endpoint=True)) fineBins = np.linspace(vmin, vmax, vbins*rebin + 1, endpoint=True) orgBins = np.linspace(vmin, vmax, vbins + 1, endpoint=True) # Re-binned bin centres fineCentres = fineBins[:-1] + 0.5 * np.diff(fineBins) orgCentres = orgBins[:-1] + 0.5 * np.diff(orgBins) pass # Get predictions evaluated at re-binned bin centres if 'erf' in FIT: fit = func(fineCentres, knn[0], knn[1], knn[2]) print "Check: ", func([1500, 2000], knn[0], knn[1], knn[2]) else: fit = knn.predict(fineCentres.reshape(-1,1)) #centres.reshape(-1,1)) # Fill ROOT "profile" profile_fit = ROOT.TH1F('profile_fit', "", len(fineBins) - 1, fineBins.flatten('C')) root_numpy.array2hist(fit, profile_fit) knn1 = PolynomialFeatures(degree=2) X_poly = knn1.fit_transform(X) reg = LinearRegression(fit_intercept=False) #fit_intercept=False) reg.fit(X_poly, percs, weights) score = round(reg.score(X_poly, percs), 4) coef = reg.coef_ intercept = reg.intercept_ print "COEFFICIENTS: ", coef, intercept TCoef = ROOT.TVector3(coef[0], coef[1], coef[2]) outFile = ROOT.TFile.Open("models/{}_jet_ungrtrk500_eff{}_stat{}_{}.root".format(FIT, EFF, MIN_STAT, MODEL),"RECREATE") outFile.cd() TCoef.Write() profile_fit.SetName("kNNfit") profile_fit.Write() outFile.Close() # profile_meas2 = ROOT.TH1F('profile_meas', "", len(x) - 1, x.flatten('C')) # root_numpy.array2hist(percs, profile_meas2) profile_meas2 = ROOT.TGraph(len(x), x, percs) pass # Plotting with Profile("Plotting"): # Plot plot(profile_meas2, profile_fit) pass # Plotting local selection efficiencies for D2-kNN < 0 # -- Compute signal efficiency # MC weights are scaled with lumi. This is just for better comparison #if INPUT =="mc": # data.loc[:,'TotalEventWeight'] /= 139000000. for sig, msk in zip([True, False], [msk_sig, msk_bkg]): # Define arrays shape = AXIS[VARX][0] bins = np.linspace(AXIS[VARX][1], AXIS[VARX][2], AXIS[VARX][0]+ 1, endpoint=True) #bins = np.linspace(AXIS[VARX][1], 4000, 40, endpoint=True) #bins = np.append(bins, [4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000]) print "HERE: ", bins #x, y = (np.zeros(shape) for _ in range(2)) # Create `profile` histogram profile_knn = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') ) profile_org = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') ) # Compute inclusive efficiency in bins of `VARX` effs = list() for i in range(shape): msk_bin = (data[VARX] > bins[i]) & (data[VARX] <= bins[i+1]) msk_pass = data[knnfeat] > 0 # <? msk_pass_org = data[orgfeat] > 70 # <? num = data.loc[msk & msk_bin & msk_pass, 'TotalEventWeight'].values.sum() num_org = data.loc[msk & msk_bin & msk_pass_org, 'TotalEventWeight'].values.sum() den = data.loc[msk & msk_bin,'TotalEventWeight'].values.sum() if den > 0: eff = num/den *100. eff_org = num_org/den *100. profile_knn.SetBinContent(i + 1, eff) profile_org.SetBinContent(i + 1, eff_org) effs.append(eff) #else: #print i, "Density = 0" pass c = rp.canvas(batch=True) leg = ROOT.TLegend(0.2, 0.75, 0.5, 0.85) leg.AddEntry(profile_knn, "#it{n}_{trk}^{#varepsilon=%s%%} > 0" % ( EFF), "l") leg.AddEntry(profile_org, "#it{n}_{trk} > 70", "l") leg.Draw() pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.10) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling profile_knn.SetLineColor(rp.colours[1]) profile_org.SetLineColor(rp.colours[2]) profile_knn.SetMarkerStyle(24) profile_knn.GetXaxis().SetTitle( "#it{m}_{jj} [GeV]" ) #latex(VARX, ROOT=True) + "[GeV]") #+ " = log(m^{2}/p_{T}^{2})") #profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True))# + " = log(m^{2}/p_{T}^{2})") profile_org.GetYaxis().SetTitle("Selection efficiency (%)") # for #it{n}_{trk}^{#varepsilon=%s%%}>0" % ( EFF)) profile_knn.GetYaxis().SetNdivisions(505) #profile_knn.GetXaxis().SetNdivisions(505) profile_knn.GetXaxis().SetTitleOffset(1.4) profile_knn.GetYaxis().SetTitleOffset(1.8) profile_knn.GetXaxis().SetRangeUser(*XRANGE) profile_org.GetXaxis().SetRangeUser(*XRANGE) yrange = (0., EFF*3) #2.0 percent if yrange: profile_knn.GetYaxis().SetRangeUser(*yrange) profile_org.GetYaxis().SetRangeUser(*yrange) pass # Draw profile_org.Draw() profile_knn.Draw("same") # Save mkdir('figures/knn/') c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.pdf'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT)) #c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.png'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL, MIN_STAT)) c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.eps'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT)) del c pass return
def plot (profile, fit): """ Method for delegating plotting. """ # rootplotting c = rp.canvas(batch=True) pad = c.pads()[0]._bare() pad.cd() pad.SetRightMargin(0.20) pad.SetLeftMargin(0.15) pad.SetTopMargin(0.10) # Styling #profile.SetLineColor(4) profile.SetMarkerColor(4) profile.SetMarkerStyle(20) fit.SetLineColor(2) fit.SetMarkerColor(4) fit.SetMarkerStyle(20) profile.GetXaxis().SetTitle( "#it{m}_{jj} [GeV]" ) #latex(VARX, ROOT=True) + " [GeV]") #+ " = log(m^{2}/p_{T}^{2})") profile.GetYaxis().SetTitle( "#it{P}^{#varepsilon=%s%%}" % (EFF) ) #"%s %s^{(%s%%)}" % ("#it{k}-NN fitted" if fit else "Measured", latex(VAR, ROOT=True), EFF)) profile.GetYaxis().SetNdivisions(505) profile.GetXaxis().SetTitleOffset(1.4) profile.GetYaxis().SetTitleOffset(1.4) profile.GetXaxis().SetRangeUser(*XRANGE) #profile.GetXaxis().SetRangeUser(1000, 9000) #fit.GetXaxis().SetRangeUser(1000, 8000) if YRANGE: profile.GetYaxis().SetRangeUser(*YRANGE) pass # Draw Goddamn it # print profile.GetBinContent(10), profile.GetNbinsX(), profile.GetEntries() profile.Draw("AP") fit.Draw("SAME") #("SAME") leg = ROOT.TLegend(0.2, 0.75, 0.5, 0.85) if INPUT=='data': leg.AddEntry(profile, "CR Data", "p") elif INPUT=='mcCR': leg.AddEntry(profile, "CR MC", "p") elif INPUT=='mc': leg.AddEntry(profile, "Full MC", "p") if 'knn' in FIT: fitLegend = "k-NN fit " elif 'poly2' in FIT: fitLegend = "2. order polynomial fit " elif 'poly3' in FIT: fitLegend = "3. order polynomial fit " elif 'erf' in FIT: fitLegend = "Error function fit " if MODEL=='data': fitLegend += "to CR Data" elif MODEL=='mcCR': fitLegend += "to CR MC" elif MODEL=='mc': fitLegend += "to Full MC" leg.AddEntry(fit, fitLegend, "l") leg.Draw() # Save mkdir('figures/knn/') c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.pdf'.format( FIT, VAR, EFF, MODEL+INPUT, MIN_STAT)) #c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.png'.format( FIT, VAR, EFF, MODEL, MIN_STAT)) c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.eps'.format( FIT, VAR, EFF, MODEL+INPUT, MIN_STAT)) del c pass
def perform_optimisation(var, bins, data): """ ... """ # Fill 2D substructure profile profile2d = fill_2d_profile(data, var, bins, "m", MASS_BINS) # Get 1D profile for lowest mass bin profile0 = profile2d.ProjectionY("%s_lowMass" % profile2d.GetName(), 1, 1) profile0 = kde(profile0) normalise(profile0, density=True) # Perform the optimisation bestShapeVal = 0 bestSumChi2 = 1e20 for shapeVal in SHAPEVAL_RANGE: print "Shape value: ", shapeVal sumChi2 = 0. # Each mass bin needs to be optimized over omega for mass in range(len(MASS_BINS) - 1): print " Mass bin: ", mass # Get 1D profile for current mass bin profile = profile2d.ProjectionY( "%s_bin_%i" % (profile2d.GetName(), mass), mass + 1, mass + 1) # Fit current profile to low-mass profile chi2, bestOmega, _, _ = fit(profile, shapeVal, profile0, "%.2f" % mass) # Accumulate chi2 sumChi2 += chi2 pass # Update chi2 for current `shapeVal` print "-- sumChi2: {} (cp. {})".format(sumChi2, bestSumChi2) if sumChi2 < bestSumChi2: bestSumChi2 = sumChi2 bestShapeVal = shapeVal pass pass # Saving CSS transforms with Profile("Saving CSS transform"): # Ensure model directory exists mkdir('models/css/') mkdir( 'figures/css/' ) ## put in by me because errors were eturned when saving the pdfs # Get the optimal, measured `omega`s for each mass-bin bestOmegas = list() for mass in range(len(MASS_BINS) - 1): profile = profile2d.ProjectionY( "%s_bin_%i_final" % (profile2d.GetName(), mass), mass + 1, mass + 1) sumChi2, bestOmega, profile_css, profile0rebin = fit( profile, bestShapeVal, profile0, "%.2f" % mass) # Test-plot distributions used for fitting! # -- Canvas c = rp.canvas(batch=True) # -- Plot profile = kde(profile) normalise(profile, density=True) lowmassbin = "#it{{m}} #in [{:.1f}, {:.1f}] GeV".format( MASS_BINS[0], MASS_BINS[1]).replace('.0', '') massbin = "#it{{m}} #in [{:.1f}, {:.1f}] GeV".format( MASS_BINS[mass], MASS_BINS[mass + 1]).replace('.0', '') c.hist(profile0rebin, label=latex(var, ROOT=True) + ", {}".format(lowmassbin), linecolor=rp.colours[1], fillcolor=rp.colours[1], alpha=0.5, option='HISTL', legend_option='FL') c.hist(profile, label=latex(var, ROOT=True) + ", {}".format(massbin), linecolor=rp.colours[4], linestyle=2, option='HISTL') c.hist(profile_css, label=latex(var + 'CSS', ROOT=True) + ", {}".format(massbin), linecolor=rp.colours[3], option='HISTL') # -- Decorations c.xlabel( latex(var, ROOT=True) + ", " + latex(var + 'CSS', ROOT=True)) c.ylabel("Number of jets p.d.f.") c.legend(xmin=0.45, ymax=0.76, width=0.25) c.text(["#sqrt{s} = 13 TeV, Multijets", "KDE smoothed"], qualifier=QUALIFIER, ATLAS=False) c.pad()._xaxis().SetTitleOffset(1.3) c.pad()._yaxis().SetNdivisions(105) c.pad()._primitives[-1].Draw('SAME AXIS') c.padding(0.50) # -- Save c.save('figures/css/css_test_{}_mass{}.pdf'.format(var, mass)) # Store best-fit omega in array print mass, bestOmega bestOmegas.append(bestOmega) pass # Fit best omega vs. mass x = MASS_BINS[:-1] + 0.5 * np.diff(MASS_BINS) y = np.array(bestOmegas) h = ROOT.TH1F('hfit', "", len(MASS_BINS) - 1, MASS_BINS) root_numpy.array2hist(y, h) for ibin in range(1, len(x) + 1): h.SetBinError( ibin, 0.02) # Just some value to ensure equal errors on all points pass m0 = 0.5 * (MASS_BINS[0] + MASS_BINS[1]) f = ROOT.TF1( "fit", "[0] * (1./{m0} - 1./x) + [1] * TMath::Log(x/{m0})".format(m0=m0), m0, 300) f.SetLineColor(rp.colours[4]) f.SetLineStyle(2) h.Fit(f) # Write out the optimal configuration for each mass bin for mass in range(len(MASS_BINS) - 1): profile = profile2d.ProjectionY( "%s_bin_%i_final" % (profile2d.GetName(), mass), mass + 1, mass + 1) profile = kde(profile) normalise(profile, density=True) bestOmegaFitted_ = f.Eval( h.GetBinCenter(mass + 1)) + np.finfo(float).eps bestOmegaFitted = max(bestOmegaFitted_, 1E-04) #bestOmegaFitted = h.GetBinContent(mass + 1) print "bestOmegaFitted[{}] = {} --> {}".format( mass, bestOmegaFitted_, bestOmegaFitted) F, Ginv = get_css_fns(bestShapeVal, bestOmegaFitted, profile, "") # Save classifier saveclf(F, 'models/css/css_%s_F_%i.pkl.gz' % (var, mass)) saveclf(Ginv, 'models/css/css_%s_Ginv_%i.pkl.gz' % (var, mass)) pass # Plot best omega vs. mass # -- Canvas c = rp.canvas(batch=True) # -- Plots #c.hist(bestOmegas, bins=MASS_BINS, linecolor=rp.colours[1]) c.hist(h, linecolor=rp.colours[1], option='HIST', label="Measured") f.Draw('SAME') # -- Decorations c.xlabel("Large-#it{R} jet mass [GeV]") c.ylabel("Best-fit #Omega_{D}") c.text([ "#sqrt{s} = 13 TeV, Multijets", "CSS applied to {}".format( latex(var, ROOT=True)), "Best-fit #alpha = {:.1f}".format(bestShapeVal) ], qualifier=QUALIFIER, ATLAS=False) c.legend(categories=[('Functional fit', { 'linewidth': 2, 'linestyle': 2, 'linecolor': rp.colours[4] })]) # Save c.save('figures/css/cssBestOmega_{}.pdf'.format(var)) pass return 0
def main (args): # Definitions histstyle = dict(**HISTSTYLE) # Initialise args, cfg = initialise(args) # Load data data, features, _ = load_data(args.input + 'data.h5', background=True, train=True) pt_bins = np.linspace(200, 2000, 18 + 1, endpoint=True) pt_bins = zip(pt_bins[:-1], pt_bins[1:]) bins = np.linspace(50, 300, (300 - 50) // 10 + 1, endpoint=True) for pt_bin in pt_bins: histstyle[True] ['label'] = 'Inclusive' histstyle[False]['label'] = 'p_{{T}} #in [{:.0f}, {:.0f}] GeV'.format(*pt_bin) # Canvas c = rp.canvas(batch=True) # Plots msk = (data['pt'] > pt_bin[0]) & (data['pt'] < pt_bin[1]) c.hist(data['m'].values, bins=bins, weight=data['weight_adv'] .values, normalise=True, **histstyle[True]) c.hist(data['m'].values[msk], bins=bins, weight=data['weight_adv'] .values[msk], normalise=True, **histstyle[False]) c.hist(data['m'].values[msk], bins=bins, weight=data['weight_test'].values[msk], normalise=True, label="Testing weight", linewidth=2, linecolor=ROOT.kGreen) # Decorations c.legend() c.xlabel("Large-#it{R} jet mass [GeV]") c.ylabel("Fraction of jets") # Save c.save('figures/temp_mass_pT{:.0f}_{:.0f}.pdf'.format(*pt_bin)) pass return # Perform selection @NOTE: For Rel. 20.7 only #data = data[(data['m'] > 50) & (data['m'] < 300)] #data = data[(data['pt'] > 200) & (data['pt'] < 2000)] # Add variables @NOTE: For Rel. 20.7 only #data['rho'] = pd.Series(np.log(np.square(data['m']) / np.square(data['pt'])), index=data.index) #data['rhoDDT'] = pd.Series(np.log(np.square(data['m']) / data['pt'] / 1.), index=data.index) data['logm'] = pd.Series(np.log(data['m']), index=data.index) # Check variable distributions axes = { 'pt': (45, 200, 2000), 'm': (50, 50, 300), 'rho': (50, -8, 0), 'logm': (50, np.log(50), np.log(300)), } weight = 'weight_adv' # 'weight_test' / 'weight' pt_range = (200., 2000.) msk_pt = (data['pt'] > pt_range[0]) & (data['pt'] < pt_range[1]) for var in axes: # Canvas c = rp.canvas(num_pads=2, batch=True) # Plot bins = np.linspace(axes[var][1], axes[var][2], axes[var][0] + 1, endpoint=True) for adv in [0,1]: msk = data['signal'] == 0 # @TEMP signal msk &= msk_pt opts = dict(normalise=True, **HISTSTYLE[adv]) # @TEMP signal opts['label'] = 'adv' if adv else 'test' if adv: h1 = c.hist(data.loc[msk, var].values, bins=bins, weights=data.loc[msk, weight].values, **opts) else: h2 = c.hist(data.loc[msk, var].values, bins=bins, weights=data.loc[msk, 'weight_test'].values, **opts) pass pass # Ratio c.pads()[1].ylim(0,2) c.ratio_plot((h1,h2), oob=True) # Decorations c.legend() c.xlabel(latex(var, ROOT=True)) c.ylabel("Fraction of jets") c.pads()[1].ylabel("adv/test") #c.logy() c.text(TEXT + ['p_{{T}} #in [{:.0f}, {:.0f}] GeV'.format(pt_range[0], pt_range[1])], qualifier=QUALIFIER) # Save mkdir('figures/distributions') c.save('figures/distributions/incl_{}.pdf'.format(var)) pass # 2D histograms msk = data['signal'] == 0 axisvars = sorted(list(axes)) for i,varx in enumerate(axisvars): for vary in axisvars[i+1:]: # Canvas c = ROOT.TCanvas() c.SetRightMargin(0.20) # Create, fill histogram h2 = ROOT.TH2F('{}_{}'.format(varx, vary), "", *(axes[varx] + axes[vary])) root_numpy.fill_hist(h2, data.loc[msk, [varx, vary]].values, 100. * data.loc[msk, weight].values) # Draw h2.Draw("COLZ") # Decorations h2.GetXaxis().SetTitle(latex(varx, ROOT=True)) h2.GetYaxis().SetTitle(latex(vary, ROOT=True)) c.SetLogz() # Save c.SaveAs('figures/distributions/2d_{}_{}.pdf'.format(varx, vary)) pass pass return
def plot1D (*argv): """ Method for delegating 1D plotting. """ # Unpack arguments graphs, ddt, arr_x, variable, fit_range = argv # Style ROOT.gStyle.SetTitleOffset(1.4, 'x') # Canvas c = rp.canvas(batch=True) # Setup pad = c.pads()[0]._bare() pad.cd() pad.SetTopMargin(0.10) pad.SetTopMargin(0.10) # Profiles if variable == VAR_TAU21: c.graph(graphs[variable], label="Original, #tau_{21}", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs[variable + 'DDT'], label="Transformed, #tau_{21}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') elif variable == VAR_N2: c.graph(graphs[variable], label="Original, N_{2}", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs[variable + 'DDT'], label="Transformed, N_{2}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') elif variable == VAR_DECDEEP: c.graph(graphs[variable], label="Original, dec_deepWvsQCD", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs[variable + 'DDT'], label="Transformed, dec_deepWvsQCD^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') elif variable == VAR_DEEP: c.graph(graphs[variable], label="Original, deepWvsQCD", linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE') c.graph(graphs[variable + 'DDT'], label="Transformed, deepWvsQCD^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE') # Fit x1, x2 = min(arr_x), max(arr_x) intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_ y1 = intercept + x1 * coef y2 = intercept + x2 * coef c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='Linear fit', linewidth=1, linestyle=1, option='L') # Decorations c.xlabel("jet #rho^{DDT} = log[m^{2} / (p_{T} #times 1 GeV)]") if variable == VAR_TAU21: c.ylabel("#LT#tau_{21}#GT, #LT#tau_{21}^{DDT}#GT") elif variable == VAR_N2: c.ylabel("#LTN_{2}#GT, #LTN_{2}^{DDT}#GT") elif variable == VAR_DECDEEP: c.ylabel("#LTdec_deepWvsQCD#GT, #LTdec_deepWvsQCD^{DDT}#GT") elif variable == VAR_DEEP: c.ylabel("#LTdeepWvsQCD#GT, #LTdeepWvsQCD^{DDT}#GT") c.text(["#sqrt{s} = 13 TeV, Multijets"], qualifier=QUALIFIER, ATLAS=False) c.legend(width=0.25, xmin=0.57, ymax=0.86) #None if "Internal" in QUALIFIER else 0.93) c.xlim(0, 6.0) if variable == VAR_N2: ymax = 0.8 else: ymax = 1.4 c.ylim(0, ymax) c.latex("Fit range", sum(fit_range) / 2., 0.08, textsize=13, textcolor=ROOT.kGray + 2) c.latex("Fit parameters:", 0.37, 0.7*ymax, align=11, textsize=14, textcolor=ROOT.kBlack) c.latex(" intercept = {:7.4f}".format(intercept[0]), 0.37, 0.65*ymax, align=11, textsize=14, textcolor=ROOT.kBlack) c.latex(" coef = {:7.4f}".format(coef[0]), 0.37, 0.6*ymax, align=11, textsize=14, textcolor=ROOT.kBlack) c.xline(fit_range[0], ymax=0.82, text_align='BR', linecolor=ROOT.kGray + 2) c.xline(fit_range[1], ymax=0.82, text_align='BL', linecolor=ROOT.kGray + 2) # Save mkdir('figures/ddt/') c.save('figures/ddt/ddt_{}.pdf'.format(variable)) return
def main(args): # Initialising # -------------------------------------------------------------------------- args, cfg = initialise(args) # Loading data # -------------------------------------------------------------------------- data, features, _ = load_data(args.input + 'data_1M_10M.h5') #data = data.sample(frac=0.5, random_state=32) # @TEMP data = data[data['train'] == 1] # Reduce size of data drop_features = [ feat for feat in list(data) if feat not in features + ['m', 'signal', 'weight_adv'] ] data.drop(drop_features, axis=1) cfg['uBoost']['train_features'] = features cfg['uBoost']['random_state'] = SEED cfg['DecisionTreeClassifier']['random_state'] = SEED # Arrays X = data #print(X.head()) w = np.array(data['weight_adv']).flatten() y = np.array(data['signal']).flatten() # Fit uBoost classifier # -------------------------------------------------------------------------- with Profile("Fitting uBoost classifier"): # @NOTE: There might be an issue with the sample weights, because the # local efficiencies computed using kNN does not seem to take the # sample weights into account. # # See: # https://github.com/arogozhnikov/hep_ml/blob/master/hep_ml/uboost.py#L247-L248 # and # https://github.com/arogozhnikov/hep_ml/blob/master/hep_ml/metrics_utils.py#L159-L176 # with `divided_weights` not set. # # `sample_weight` seem to be use only as a starting point for the # boosted, and so not used for the efficiency calculation. # # If this is indeed the case, it would be possible to simply # sample MC events by their weight, and use `sample_weight = 1` # for all samples passed to uBoost. # # @NOTE: I have gotten less sure of the above, so probably no panic. def train_uBoost(X, y, w, cfg, uniforming_rate): """ ... """ # Create base classifier base_tree = DecisionTreeClassifier(**cfg['DecisionTreeClassifier']) # Update training configuration these_cfg = dict(**cfg['uBoost']) these_cfg['uniforming_rate'] = uniforming_rate # Create uBoost classifier uboost = uBoostBDT(base_estimator=base_tree, **these_cfg) # Fit uBoost classifier uboost.fit(X, y, sample_weight=w) return uboost #uniforming_rates = [0.0, 0.01, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0] uniforming_rates = [0.0, 0.01, 0.1, 0.3, 0.5, 1.0] #uniforming_rates = [0.5, 1.0] n_jobs = min(7, len(uniforming_rates)) # ...(10, ... jobs = [ delayed(train_uBoost, check_pickle=False)(X, y, w, cfg, uniforming_rate) for uniforming_rate in uniforming_rates ] result = Parallel(n_jobs=n_jobs, backend="threading")(jobs) pass # Saving classifiers # -------------------------------------------------------------------------- for uboost, uniforming_rate in zip(result, uniforming_rates): with Profile("Saving classifiers"): # Ensure model directory exists mkdir('models/uboost/') suffix_ur = "ur_{:s}".format( ("%.2f" % uniforming_rate).replace('.', 'p')) suffix_te = "te_{:d}".format( int(cfg['uBoost']['target_efficiency'] * 100)) # Save uBoost classifier with gzip.open( 'models/uboost/uboost_{}_{}_rel21_fixed_def_cfg_1000boost.pkl.gz' .format(suffix_ur, suffix_te), 'w') as f: pickle.dump(uboost, f) pass pass pass return 0
def main(args): # Definitions histstyle = dict(**HISTSTYLE) # Initialise args, cfg = initialise(args) # Load data data, features, _ = load_data(args.input + 'data.h5', background=True, train=True) pt_bins = np.linspace(200, 2000, 18 + 1, endpoint=True) pt_bins = [None] + zip(pt_bins[:-1], pt_bins[1:]) vars = ['m', 'pt'] for var, pt_bin, log in itertools.product(vars, pt_bins, [True, False]): if var == 'm': bins = np.linspace(50, 300, (300 - 50) // 10 + 1, endpoint=True) else: bins = np.linspace(200, 2000, (2000 - 200) // 50 + 1, endpoint=True) pass histstyle[True]['label'] = 'Training weight' histstyle[False]['label'] = 'Testing weight' # Canvas c = rp.canvas(batch=True) # Plots if pt_bin is not None: msk = (data['pt'] > pt_bin[0]) & (data['pt'] < pt_bin[1]) else: msk = np.ones(data.shape[0], dtype=bool) pass if pt_bin is not None: c.hist(data[var].values[msk], bins=bins, weights=data['weight_test'].values[msk], normalise=True, **histstyle[False]) c.hist(data[var].values[msk], bins=bins, weights=data['weight_adv'].values[msk], normalise=True, **histstyle[True]) #c.hist(data[var].values, bins=bins, weights=data['weight_adv'] .values, normalise=True, **histstyle[True]) #c.hist(data[var].values[msk], bins=bins, weights=data['weight_adv'] .values[msk], normalise=True, **histstyle[False]) #c.hist(data[var].values[msk], bins=bins, weights=data['weight_test'].values[msk], normalise=True, label="Testing weight", linewidth=2, linecolor=ROOT.kGreen) else: c.hist(data[var].values[msk], bins=bins, weights=data['weight_test'].values[msk], normalise=True, **histstyle[False]) c.hist(data[var].values[msk], bins=bins, weights=data['weight_adv'].values[msk], normalise=True, **histstyle[True]) pass # Decorations c.text(TEXT + ["Multijets", "Training dataset"] + (['p_{{T}} #in [{:.0f}, {:.0f}] GeV'.format( *pt_bin)] if pt_bin is not None else []), qualifier='Simulation Internal') c.legend() c.xlabel("Large-#it{{R}} jet {:s} [GeV]".format('mass' if var == 'm' else 'p_{T}')) c.ylabel("Fraction of jets") if log: c.logy() pass # Save c.save('figures/weighting_{}{:s}{}.pdf'.format( 'mass' if var == 'm' else var, '_pT{:.0f}_{:.0f}'.format(*pt_bin) if pt_bin is not None else '', '_log' if log else '')) pass return data['logm'] = pd.Series(np.log(data['m']), index=data.index) # Check variable distributions axes = { 'pt': (45, 200, 2000), 'm': (50, 50, 300), 'rho': (50, -8, 0), 'logm': (50, np.log(50), np.log(300)), } weight = 'weight_adv' # 'weight_test' / 'weight' pt_range = (200., 2000.) msk_pt = (data['pt'] > pt_range[0]) & (data['pt'] < pt_range[1]) for var in axes: # Canvas c = rp.canvas(num_pads=2, batch=True) # Plot bins = np.linspace(axes[var][1], axes[var][2], axes[var][0] + 1, endpoint=True) for adv in [0, 1]: msk = data['signal'] == 0 # @TEMP signal msk &= msk_pt opts = dict(normalise=True, **HISTSTYLE[adv]) # @TEMP signal opts['label'] = 'adv' if adv else 'test' if adv: h1 = c.hist(data.loc[msk, var].values, bins=bins, weights=data.loc[msk, weight].values, **opts) else: h2 = c.hist(data.loc[msk, var].values, bins=bins, weights=data.loc[msk, 'weight_test'].values, **opts) pass pass # Ratio c.pads()[1].ylim(0, 2) c.ratio_plot((h1, h2), oob=True) # Decorations c.legend() c.xlabel(latex(var, ROOT=True)) c.ylabel("Fraction of jets") c.pads()[1].ylabel("adv/test") #c.logy() c.text(TEXT + [ 'p_{{T}} #in [{:.0f}, {:.0f}] GeV'.format(pt_range[0], pt_range[1]) ], qualifier=QUALIFIER) # Save mkdir('figures/distributions') c.save('figures/distributions/incl_{}.pdf'.format(var)) pass # 2D histograms msk = data['signal'] == 0 axisvars = sorted(list(axes)) for i, varx in enumerate(axisvars): for vary in axisvars[i + 1:]: # Canvas c = ROOT.TCanvas() c.SetRightMargin(0.20) # Create, fill histogram h2 = ROOT.TH2F('{}_{}'.format(varx, vary), "", *(axes[varx] + axes[vary])) root_numpy.fill_hist(h2, data.loc[msk, [varx, vary]].values, 100. * data.loc[msk, weight].values) # Draw h2.Draw("COLZ") # Decorations h2.GetXaxis().SetTitle(latex(varx, ROOT=True)) h2.GetYaxis().SetTitle(latex(vary, ROOT=True)) c.SetLogz() # Save c.SaveAs('figures/distributions/2d_{}_{}.pdf'.format(varx, vary)) pass pass return