def plot_histograms(pred, data, bins, same_bin_edges=True, colors=['orange', 'c'], variable_list=[r'$p_T$', r'$\eta$', r'$\phi$', r'$E$'], variable_names=['pT', 'eta', 'phi', 'E'], unit_list=['[GeV]', '[rad]', '[rad]', '[GeV]'], title=None): alph = 0.8 n_bins = bins for kk in np.arange(4): plt.figure() n_hist_data, bin_edges, _ = plt.hist(data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins) if same_bin_edges: n_bins_2 = bin_edges else: n_bins_2 = bins n_hist_pred, _, _ = plt.hist(pred[:, kk], color=colors[0], label='Output', alpha=alph, bins=n_bins_2) if title is None: plt.suptitle(variable_names[kk]) else: plt.suptitle(title) plt.xlabel(variable_list[kk] + ' ' + unit_list[kk]) plt.ylabel('Number of events') ms.sciy() plt.legend()
def plot_residuals(pred, data, range=None, variable_names=['pT', 'eta', 'phi', 'E'], bins=1000, save=None, title=None): alph = 0.8 residuals = (pred.numpy() - data.numpy()) / data.numpy() for kk in np.arange(4): plt.figure() n_hist_pred, bin_edges, _ = plt.hist(residuals[:, kk], label='Residuals', alpha=alph, bins=bins, range=range) if title is None: plt.suptitle('Residuals of %s' % variable_names[kk]) else: plt.suptitle(title) plt.xlabel( r'$(%s_{recon} - %s_{true}) / %s_{true}$' % (variable_names[kk], variable_names[kk], variable_names[kk])) plt.ylabel('Number of events') ms.sciy() if save is not None: plt.savefig(save + '_%s' % variable_names[kk])
model.eval() idxs = (0, int(1e5)) # Choose events to compare data = torch.tensor(test_x[idxs[0]:idxs[1]].values) latent = model.encode(data).detach().numpy() for kk in np.arange(latent.shape[1]): ax = ax1[ii, kk] plt.sca(ax) plt.hist(latent[:, kk], label='$z_%d$' % (kk + 1), color='m', bins=100) plt.suptitle('Latent variable histograms' % (kk + 1)) if ii == len(model_list) - 1: plt.xlabel('$z_%d$' % (kk + 1)) plt.title(model.describe(), fontsize=16) plt.legend() ms.sciy() plt.subplots_adjust(left=0.05, right=0.97, bottom=0.05, top=0.92) fig2, ax2 = plt.subplots(ncols=len(model_list), nrows=1, figsize=(32, 8), sharex=True, sharey=True) for ii, model in enumerate(model_list): save_path = model_folder + model_file_list[ii] model.load_state_dict(torch.load(save_path)) model.eval() idxs = (0, 10000) # Choose events to compare data = torch.tensor(test_x[idxs[0]:idxs[1]].values) latent = model.encode(data).detach().numpy()
def make_plots(model, train_x, train_y, test_x, test_y, curr_save_folder, model_name): unit_list = ['[GeV]', '[rad]', '[rad]', '[GeV]'] variable_list = [r'$p_T$', r'$\eta$', r'$\phi$', r'$E$'] line_style = ['--', '-'] colors = ['orange', 'c'] markers = ['*', 's'] model.to('cpu') # Histograms idxs = (0, 100000) # Choose events to compare data = torch.tensor(test_x[idxs[0]:idxs[1]].values, dtype = torch.float) pred = model(data).detach().numpy() pred = np.multiply(pred, train_x.std().values) pred = np.add(pred, train_x.mean().values) data = np.multiply(data, train_x.std().values) data = np.add(data, train_x.mean().values) alph = 0.8 n_bins = 50 for kk in np.arange(4): plt.figure(kk + 4) n_hist_data, bin_edges, _ = plt.hist(data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins) n_hist_pred, _, _ = plt.hist(pred[:, kk], color=colors[0], label='Output', alpha=alph, bins=bin_edges) plt.suptitle(train_x.columns[kk]) plt.xlabel(variable_list[kk] + ' ' + unit_list[kk]) plt.ylabel('Number of events') ms.sciy() # plt.yscale('log') plt.legend() fig_name = model_name + '_hist_%s' % train_x.columns[kk] plt.savefig(curr_save_folder + fig_name) residual_strings = [r'$(p_{T,out} - p_{T,in}) / p_{T,in}$', r'$(\eta_{out} - \eta_{in}) / \eta_{in}$', r'$(\phi_{out} - \phi_{in}) / \phi_{in}$', r'$(E_{out} - E_{in}) / E_{in}$'] residuals = (pred - data.detach().numpy()) / data.detach().numpy() range = (-.02, .02) for kk in np.arange(4): plt.figure() n_hist_pred, bin_edges, _ = plt.hist( residuals[:, kk], label='Residuals', linestyle=line_style[0], alpha=alph, bins=100, range=range) plt.suptitle('Residuals of %s' % train_x.columns[kk]) plt.xlabel(residual_strings[kk]) # (train_x.columns[kk], train_x.columns[kk], train_x.columns[kk])) plt.ylabel('Number of jets') ms.sciy() #plt.yscale('log') std = np.std(residuals[:, kk]) std_err = utils.std_error(residuals[:, kk]) mean = np.nanmean(residuals[:, kk]) sem = stats.sem(residuals[:, kk], nan_policy='omit') ax = plt.gca() plt.text(.75, .8, 'Mean = %f$\pm$%f\n$\sigma$ = %f$\pm$%f' % (mean, sem, std, std_err), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10}, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=18) fig_name = model_name + '_residual_%s' % train_x.columns[kk] plt.savefig(curr_save_folder + fig_name) res_df = pd.DataFrame({'pt': residuals[:, 0], 'eta': residuals[:, 1], 'phi': residuals[:, 2], 'E': residuals[:, 3]}) save = True # Generate a custom diverging colormap cmap = sns.diverging_palette(10, 220, as_cmap=True) #cmap = 'RdBu' norm = mpl.colors.Normalize(vmin=-1, vmax=1, clip=False) mappable = mpl.cm.ScalarMappable(norm=norm, cmap=cmap) group = ['pt', 'eta', 'phi', 'E'] label_kwargs = {'fontsize': 20} title_kwargs = {"fontsize": 11} mpl.rcParams['lines.linewidth'] = 1 mpl.rcParams['xtick.labelsize'] = 12 mpl.rcParams['ytick.labelsize'] = 12 group_arr = res_df.values corr = res_df.corr() qs = np.quantile(group_arr, q=[.0025, .9975], axis=0) ndim = qs.shape[1] ranges = [tuple(qs[:, kk]) for kk in np.arange(ndim)] figure = corner(group_arr, range=ranges, plot_density=True, plot_contours=True, no_fill_contours=False, #range=[range for i in np.arange(ndim)], bins=50, labels=group, label_kwargs=label_kwargs, #truths=[0 for kk in np.arange(qs.shape[1])], show_titles=True, title_kwargs=title_kwargs, quantiles=(0.16, 0.84), # levels=(1 - np.exp(-0.5), .90), fill_contours=False, title_fmt='.2e') levels=(1 - np.exp(-0.5), .90), fill_contours=False, title_fmt='.1e') # # Extract the axes axes = np.array(figure.axes).reshape((ndim, ndim)) # Loop over the diagonal linecol = 'r' linstyl = 'dashed' # Loop over the histograms for yi in np.arange(ndim): for xi in np.arange(yi): ax = axes[yi, xi] # Set face color according to correlation ax.set_facecolor(color=mappable.to_rgba(corr.values[yi, xi])) cax = figure.add_axes([.87, .4, .04, 0.55]) cbar = plt.colorbar(mappable, cax=cax, format='%.1f', ticks=np.arange(-1., 1.1, 0.2)) cbar.ax.set_ylabel('Correlation', fontsize=20) if save: fig_name = 'corner_3d.png' plt.savefig(curr_save_folder + fig_name)
def save_plots(learn, module_string, lr, wd, pp): # Make and save figures curr_mod_folder = get_mod_folder(module_string, lr, pp, wd) curr_save_folder = grid_search_folder + curr_mod_folder if not os.path.exists(curr_save_folder): os.mkdir(curr_save_folder) # Weight activation stats plot_activations(learn, save=curr_save_folder + 'weight_activation') # Plot losses batches = len(learn.recorder.losses) epos = len(learn.recorder.val_losses) val_iter = (batches / epos) * np.arange(1, epos + 1, 1) loss_name = str(loss_func).split("(")[0] plt.figure() plt.plot(learn.recorder.losses, label='Train') plt.plot(val_iter, learn.recorder.val_losses, label='Validation', color='orange') plt.yscale(value='log') plt.legend() plt.ylabel(loss_name) plt.xlabel('Batches processed') fig_name = 'losses' plt.savefig(curr_save_folder + fig_name) plt.figure() plt.plot(learn.recorder.val_losses, label='Validation', color='orange') plt.title('Validation loss') plt.legend() plt.ylabel(loss_name) plt.xlabel('Epoch') for i_val, val in enumerate(learn.recorder.val_losses): plt.text(i_val, val, str(val), horizontalalignment='center') fig_name = 'losses_val' plt.savefig(curr_save_folder + fig_name + '.png') with open(curr_save_folder + 'losses.txt', 'w') as f: for i_val, val in enumerate(learn.recorder.val_losses): f.write('Epoch %d Validation %s: %e Training %s: %e\n' % (i_val, loss_name, val, loss_name, learn.recorder.losses[(i_val + 1) * (int(batches / epos - 1))])) # Histograms idxs = (0, 100000) # Choose events to compare pred, data = get_unnormalized_reconstructions(learn.model, df=test_x, idxs=idxs, train_mean=train_mean, train_std=train_std) alph = 0.8 n_bins = 50 for kk in np.arange(4): plt.figure() n_hist_data, bin_edges, _ = plt.hist(data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins) n_hist_pred, _, _ = plt.hist(pred[:, kk], color=colors[0], label='Output', alpha=alph, bins=bin_edges) plt.suptitle(train_x.columns[kk]) plt.xlabel(variable_list[kk] + ' ' + unit_list[kk]) plt.ylabel('Number of events') ms.sciy() fig_name = 'hist_%s' % train_x.columns[kk] plt.savefig(curr_save_folder + fig_name) # Plot input on top of output idxs = (0, 100) # Choose events to compare pred, data = get_unnormalized_reconstructions(learn.model, df=test_x, idxs=idxs, train_mean=train_mean, train_std=train_std) for kk in np.arange(4): plt.figure() plt.plot(data[:, kk], color=colors[1], label='Input', linestyle=line_style[1], marker=markers[1]) plt.plot(pred[:, kk], color=colors[0], label='Output', linestyle=line_style[0], marker=markers[0]) plt.suptitle(train.columns[kk]) plt.xlabel('Event') plt.ylabel(variable_list[kk] + ' ' + unit_list[kk]) plt.legend() ms.sciy() fig_name = 'plot_%s' % train_x.columns[kk] plt.savefig(curr_save_folder + fig_name) # Plot latent space data = torch.tensor(test_x.values) latent = learn.model.encode(data).detach().numpy() for ii in np.arange(latent.shape[1]): plt.figure() plt.hist(latent[:, ii], label='$z_%d$' % (ii + 1), color='m') plt.suptitle('Latent variable #%d' % (ii + 1)) plt.legend() ms.sciy() fig_name = 'latent_hist_z%d' % (ii + 1) plt.savefig(curr_save_folder + fig_name) # Latent space scatter plots idxs = (0, 10000) # Choose events to compare data = torch.tensor(test_x[idxs[0]:idxs[1]].values) latent = learn.model.encode(data).detach().numpy() mksz = 1 plt.figure() plt.scatter(latent[:, 0], latent[:, 1], s=mksz) plt.xlabel(r'$z_1$') plt.ylabel(r'$z_2$') fig_name = 'latent_scatter_z1z2' plt.savefig(curr_save_folder + fig_name) plt.figure() plt.scatter(latent[:, 0], latent[:, 2], s=mksz) plt.xlabel(r'$z_1$') plt.ylabel(r'$z_3$') fig_name = 'latent_scatter_z1z3' plt.savefig(curr_save_folder + fig_name) plt.figure() plt.scatter(latent[:, 1], latent[:, 2], s=mksz) plt.xlabel(r'$z_2$') plt.ylabel(r'$z_3$') fig_name = 'latent_scatter_z2z3' plt.savefig(curr_save_folder + fig_name) # Low pT histograms # Histograms idxs = (0, 100000) # Choose events to compare pred, data = get_unnormalized_reconstructions(learn.model, df=test_x, idxs=idxs, train_mean=train_mean, train_std=train_std) alph = 0.8 n_bins = 50 for kk in np.arange(4): plt.figure() n_hist_data, bin_edges, _ = plt.hist(data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins) n_hist_pred, _, _ = plt.hist(pred[:, kk], color=colors[0], label='Output', alpha=alph, bins=bin_edges) plt.suptitle(train_x.columns[kk]) plt.xlabel(variable_list[kk] + ' ' + unit_list[kk]) plt.ylabel('Number of events') ms.sciy() plt.legend() fig_name = 'lowpt_hist_%s' % train_x.columns[kk] plt.savefig(curr_save_folder + fig_name) return curr_mod_folder