def plot_score(self): """Plot the features""" betterColors = hpt.betterColors()['linecolors'] filename = self.metadata['file'].split('/')[-1].split('.')[0].rstrip( '\n') # Plot all k-fold cross-validation results for i, (train_X, train_Y, test_X, test_Y) in enumerate( zip(self.train_data['X'], self.train_data['Y'], self.test_data['X'], self.test_data['Y'])): hist = HepPlotter("histogram", 1) hist.ratio_plot = True hist.y_ratio_label = "Test/Train" hist.normed = True hist.binning = [0.05 * j for j in range(21)] hist.stacked = False hist.logplot = False hist.x_label = "DNN Score" hist.y_label = "Events" hist.format = self.image_format hist.label_size = 14 hist.saveAs = self.output + "/hist_DNNscore_kfold{0}_{1}".format( i, self.date) hist.ATLASlabel = 'top left' hist.ATLASlabelStatus = 'Simulation Internal' hist.numLegendColumns = 1 hist.extra_text.Add(self.processlabel_args[filename]['label'], coords=[0.03, 0.80], fontsize=14) hist.initialize() top_train_scores = self.train_scores[i][train_Y == 1] tbar_train_scores = self.train_scores[i][train_Y == 0] top_test_scores = self.test_scores[i][test_Y == 1] tbar_test_scores = self.test_scores[i][test_Y == 0] ## Train index = i * 2 top_color = 'r' #betterColors[index] tbar_color = 'b' #betterColors[index+1] hist.Add(top_train_scores, name='score_top_train_' + str(i), linecolor=top_color, color=top_color, linewidth=2, draw='step', label='Large-R Jet (top) Train ' + str(i), ratio_den=True, ratio_num=False, ratio_partner='score_top_test_' + str(i)) hist.Add(tbar_train_scores, name='score_tbar_train_' + str(i), linecolor=tbar_color, color=tbar_color, linewidth=2, draw='step', label='Large-R Jet (anti-top) Train ' + str(i), ratio_den=True, ratio_num=False, ratio_partner='score_tbar_test_' + str(i)) ## Test hist.Add(top_test_scores, name='score_top_test_' + str(i), linecolor=top_color, color=top_color, draw='stepfilled', label='Large-R Jet (top) Test ' + str(i), alpha=0.5, linewidth=0, ratio_num=True, ratio_den=False, ratio_partner='score_top_train_' + str(i)) hist.Add(tbar_test_scores, name='score_tbar_test_' + str(i), linecolor=tbar_color, color=tbar_color, draw='stepfilled', label='Large-R Jet (anti-top) Test ' + str(i), alpha=0.5, linewidth=0, ratio_num=True, ratio_den=False, ratio_partner='score_tbar_train_' + str(i)) p = hist.execute() hist.savefig() ## Calculation of the rejection ## use percentile (set above) to calculate at specific efficiency eff_value = np.percentile(top_test_scores, self.percentile) tbar_wrong = tbar_test_scores[tbar_test_scores >= eff_value] rejection = len(tbar_wrong) * 1.0 / len(tbar_test_scores) self.rejections.append(rejection) self.rejection = { 'mean': np.mean(self.rejections), 'std': np.std(self.rejections) } return
def plot_features(self): """Plot the features""" self.msg_svc.INFO( "DL : Plotting features comparing top quarks and anti-quarks. ") top = self.df.loc[self.df['target'] == self.metadata['t_target']] tbar = self.df.loc[self.df['target'] == self.metadata['tbar_target']] filename = self.metadata['file'].split('/')[-1].split('.')[0].rstrip( '\n') processed_features = [] for hi, feature in enumerate(self.features2plot): eventlevel = False if feature.startswith( 't_'): # specific top properties in dataframe feature = feature[2:] eventlevel = True elif feature.startswith( 'tbar_'): # specific tbar properties in dataframe feature = feature[5:] eventlevel = True else: eventlevel = False # single object in dataframe (use 'target' to distinguish) if feature in processed_features: continue else: processed_features.append(feature) if 'btag' in feature: x_label = self.text_dicts[feature]['label'].format( self.metadata['btag_wkpt']) else: x_label = self.text_dicts[feature]['label'] hist = HepPlotter("histogram", 1) hist.ratio_plot = False hist.binning = self.text_dicts[feature]['bins'] hist.stacked = False hist.logplot = False hist.x_label = x_label hist.y_label = "Events" hist.format = self.image_format hist.saveAs = self.output + "/hist_" + feature + "_" + self.date hist.ATLASlabel = 'top left' hist.ATLASlabelStatus = 'Simulation Internal' hist.numLegendColumns = 1 hist.extra_text.Add(self.processlabel_args[filename]['label'], coords=[0.03, 0.80]) hist.initialize() multiply = 1. if feature.endswith('_m_ttbar') or feature == 'pt': multiply = 1e-3 if eventlevel: hist.Add(self.df['t_' + feature], name=feature + '_top', linecolor='r', color='r', draw='step', label='Large-R Jet (top)') hist.Add(self.df['tbar_' + feature], name=feature + '_tbar', linecolor='b', color='b', draw='step', label='Large-R Jet (anti-top)') else: hist.Add(top[feature].multiply(multiply), name=feature + '_top', linecolor='r', color='r', draw='step', label='Large-R Jet (top)') hist.Add(tbar[feature].multiply(multiply), name=feature + '_tbar', linecolor='b', color='b', draw='step', label='Large-R Jet (anti-top)') p = hist.execute() hist.savefig() ## Correlation Matrices of Features (top/antitop) ## corrmat_df_top = top[self.features].corr() corrmat_df_tbar = tbar[self.features].corr() names = ["top", "tbar"] namelabels = [r"t correlations", r"$\bar{\text{t}}$ correlations"] fontProperties = {'family': 'sans-serif'} opts = {'cmap': plt.get_cmap("bwr"), 'vmin': -1, 'vmax': +1} for c, corrmat in enumerate([corrmat_df_top, corrmat_df_tbar]): fig, ax = plt.subplots() # hide the upper part of the triangle #mask = np.zeros_like(corrmat, dtype=np.bool) # return array of zeros with same shape as corrmat #mask[np.tril_indices_from(mask)] = True #corrmat_mask = np.ma.array(corrmat, mask=mask) heatmap1 = ax.pcolor(corrmat, **opts) cbar = plt.colorbar(heatmap1, ax=ax) cbar.ax.set_yticklabels( [i.get_text().strip('$') for i in cbar.ax.get_yticklabels()], **fontProperties) labels = corrmat.columns.values labels = [i.replace('_', '\_') for i in labels] # shift location of ticks to center of the bins ax.set_xticks(np.arange(len(labels)) + 0.5, minor=False) ax.set_yticks(np.arange(len(labels)) + 0.5, minor=False) ax.set_xticklabels(labels, fontProperties, fontsize=18, minor=False, ha='right', rotation=70) ax.set_yticklabels(labels, fontProperties, fontsize=18, minor=False) text_args = { 'fontsize': 16, 'ha': 'left', 'va': 'bottom', 'transform': ax.transAxes } ## ATLAS Label + Signal name ax.text(0.02, 1.00, r"\textbf{\textit{ATLAS}} Simulation Internal", **text_args) ax.text( 0.03, 0.93, "{0}, {1}".format(self.processlabel_args[filename]['label'], namelabels[c]), **text_args) ## Energy Label text_args['ha'] = 'right' ax.text(0.99, 1.00, r"$\sqrt{\text{s}}$ = 13 TeV", **text_args) plt.savefig(self.output + "/correlations_{0}_{1}.{2}".format( names[c], self.date, self.image_format), format=self.image_format, dpi=300, bbox_inches='tight') plt.close() return
# Access data -- assumes you are plotting histograms from multiple sources in one figure for hi, histogram in enumerate(histograms): histogram = histogram.strip('\n') histogramName = histogram.split("h_")[1].split("_pre")[0] print " :: Plotting " + histogram + "\n" ## setup histogram hist = HepPlotter("histogram", 1) hist.ratio_plot = False # plot a ratio of things [Data/MC] hist.ratio_type = "ratio" # "ratio" hist.stacked = True # stack plots hist.rebin = 10 hist.logplot = False # plot on log scale hist.x_label = x_labels[histogramName]["label"] hist.y_label = "Events" hist.y_ratio_label = "" hist.lumi = 'XY.Z' # in /fb hist.format = 'png' # file format for saving image hist.saveAs = outpath + "/hist_" + histogram # save figure with name hist.ATLASlabel = 'top left' # 'top left', 'top right'; hack code for something else hist.ATLASlabelStatus = 'Internal' # ('Simulation')+'Internal' || 'Preliminary' # hist.extra_text.Add("text here",coords=[x,y]) # see hepPlotter for exact use of extra_text (PlotText() objects) hist.initialize() ## Add the data from each file for fi, file in enumerate(files): file = file.rstrip("\n") f = ROOT.TFile.Open(file)
def drawSyst(self, name=[], symmetrized=None, one_sided=False): """ Draw single systematic with nominal @param name name(s) for histogram @param symmetrized Values from symmetrized uncertainties @param one_sided Boolean for one sided systematic or not """ if name[0].endswith("up"): systname = name[0][:-2].replace("_", "-") elif name[0].endswith("down"): systname = name[0][:-4].replace("_", "-") systname = systname.split("xleptonicT-mmerged-boostedcomb-")[1] h_nominal = self.systData['nominal'][ 'data'] # data (histogram bins values) b_nominal = self.systData['nominal'][ 'center'] # dummy values to get binning right hist = HepPlotter("histogram", 1) hist.ratio_plot = True # plot a ratio of things [Data/MC] hist.ratio_type = "ratio" # "ratio" hist.stacked = False # stack plots hist.rebin = self.rebin hist.logplot = False # plot on log scale hist.x_label = self.x_labels[self.variable]['label'] hist.y_label = "Events" hist.extra_text = systname + '\n ' + self.sampleName hist.binning = self.systData['nominal']['bins'] hist.numLegendColumns = 1 hist.y_ratio_label = "Syst/Nom" hist.lumi = '14.7' # in /fb hist.format = 'png' # file format for saving image hist.saveAs = self.outpath + "h_syst_" + self.sampleName + "_" + systname # save figure with unique name hist.CMSlabel = 'top left' # 'top left', 'top right'; hack code for something else hist.CMSlabelStatus = 'Simulation Internal' # ('Simulation')+'Internal' || 'Preliminary' hist.initialize() ## Regular uncertainties up = self.systData[name[0]]['center'] upData = self.systData[name[0]]['data'] hist.Add(up, weights=upData, name=systname + " UP", label="UP", linecolor='r', color='r', linestyle='dotted', draw='step', ratio_num=True, ratio_den=False, ratio_partner="nominal") if not one_sided: down = self.systData[name[1]]['center'] downData = self.systData[name[1]]['data'] hist.Add(down, weights=downData, name=systname + " DOWN", label="DOWN", linecolor='b', color='b', linestyle='dotted', draw='step', ratio_num=True, ratio_den=False, ratio_partner="nominal") ## Symmetrized uncertainties if symmetrized is not None: # - same binning as 'up' systematic hist.Add(up, weights=h_nominal + symmetrized, name=systname + " UP Symm.", label="UP Symm.", linecolor='r', linestyle='solid', draw='step', color='r', ratio_num=True, ratio_den=False, ratio_partner="nominal") hist.Add(up, weights=h_nominal - symmetrized, name=systname + " DOWN Symm.", label="DOWN Symm.", linecolor='b', linestyle='solid', draw='step', color='b', ratio_num=True, ratio_den=False, ratio_partner="nominal") ## nominal uncertainty_hists = [ systname + " UP", systname + " DOWN", systname + " UP Symm.", systname + " DOWN Symm." ] hist.Add(b_nominal, weights=h_nominal, name="nominal", label="nominal", linecolor='k', draw='step', linestyle='solid', ratio_num=False, ratio_den=True, ratio_partner=uncertainty_hists) p = hist.execute() # can do something with p, if needed hist.savefig() # save and close the figure return
numberOfHists = 0 # Access data -- assumes you are plotting histograms from multiple sources in one figure for hi,histogram in enumerate(histograms): histogram = histogram.strip('\n') print " :: Plotting "+histogram+"\n" ## setup histogram hist = HepPlotter("histogram",1) hist.ratio_plot = False # plot a ratio of things [Data/MC] hist.ratio_type = "ratio" # "ratio" hist.stacked = True # stack plots hist.rebin = 1 hist.logplot = False # plot on log scale hist.x_label = x_labels[histogram] hist.y_label = "Events" hist.y_ratio_label = "" hist.lumi = 'XY.Z' # in /fb hist.format = 'png' # file format for saving image hist.saveAs = outpath+"_hist_"+histogram # save figure with name hist.ATLASlabel = 'top left' # 'top left', 'top right'; hack code for something else hist.ATLASlabelStatus = 'Internal' # ('Simulation')+'Internal' || 'Preliminary' hist.initialize() ## Add the data from each file for fi,file in enumerate(files): file = file.rstrip("\n") f = ROOT.TFile.Open(file) filename = file.split("/")[-1].split(".")[0]
## switch order of file & hist loops ## To plot multiple kinds of variables on different plots, ## you'll need another loop for file in files: file = file.rstrip("\n") f = ROOT.TFile.Open(file) filename = file.split("/")[-1].split(".")[0] print " > Opening data from ", filename ## setup histogram hist = HepPlotter("efficiency", 1) hist.drawEffDist = True # draw the physics distribution for efficiency (jet_pt for jet trigger) hist.rebin = 1 hist.x_label = r"Jet p$_\text{T}$ [GeV]" hist.y_label = "Efficiency" hist.extra_text = extraText[filename] hist.format = 'png' # file format for saving image hist.saveAs = outpath + "eff_" + filename # save figure with name hist.ATLASlabel = 'top left' # 'top left', 'top right'; hack code for something else hist.ATLASlabelStatus = 'Simulation Internal' # ('Simulation')+'Internal' || 'Preliminary' hist.initialize() # loop over variables to put on one plot for hi, histogram in enumerate(histograms): histogram = histogram.strip('\n') print " :: Plotting " + histogram