for kk, key in enumerate(test.keys()): if key in diff_list: curr_residuals = diff[:, kk] if key == 'AverageLArQF': curr_residuals = curr_residuals[ np.abs(curr_residuals) < 1000] limits = (-1000, 1000) range = None else: curr_residuals = residuals[:, kk] limits = None range = (-0.1, 0.1) rms = utils.nanrms(curr_residuals) std = stats.tstd(curr_residuals, limits=limits) std_err = utils.std_error(curr_residuals) mean = np.mean(curr_residuals) sem = stats.sem(curr_residuals, nan_policy='omit') best_to_worst_dict[performance][key]['mean'].append(mean) best_to_worst_dict[performance][key]['std'].append(std) best_to_worst_dict[performance][key]['rms'].append(rms) best_to_worst_dict[performance][key]['std_err'].append(std_err) best_to_worst_dict[performance][key]['sem'].append(sem) style_dict = {'best': 'b', 'median': 'g', 'worst': 'r'} for key in test.keys(): plt.figure() for performance in performances: plt.plot(best_to_worst_dict[performance]['latent_space'],
def make_plots(model, train_x, train_y, test_x, test_y, curr_save_folder, model_name): unit_list = ['[GeV]', '[rad]', '[rad]', '[GeV]'] variable_list = [r'$p_T$', r'$\eta$', r'$\phi$', r'$E$'] line_style = ['--', '-'] colors = ['orange', 'c'] markers = ['*', 's'] model.to('cpu') # Histograms idxs = (0, 100000) # Choose events to compare data = torch.tensor(test_x[idxs[0]:idxs[1]].values, dtype = torch.float) pred = model(data).detach().numpy() pred = np.multiply(pred, train_x.std().values) pred = np.add(pred, train_x.mean().values) data = np.multiply(data, train_x.std().values) data = np.add(data, train_x.mean().values) alph = 0.8 n_bins = 50 for kk in np.arange(4): plt.figure(kk + 4) n_hist_data, bin_edges, _ = plt.hist(data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins) n_hist_pred, _, _ = plt.hist(pred[:, kk], color=colors[0], label='Output', alpha=alph, bins=bin_edges) plt.suptitle(train_x.columns[kk]) plt.xlabel(variable_list[kk] + ' ' + unit_list[kk]) plt.ylabel('Number of events') ms.sciy() # plt.yscale('log') plt.legend() fig_name = model_name + '_hist_%s' % train_x.columns[kk] plt.savefig(curr_save_folder + fig_name) residual_strings = [r'$(p_{T,out} - p_{T,in}) / p_{T,in}$', r'$(\eta_{out} - \eta_{in}) / \eta_{in}$', r'$(\phi_{out} - \phi_{in}) / \phi_{in}$', r'$(E_{out} - E_{in}) / E_{in}$'] residuals = (pred - data.detach().numpy()) / data.detach().numpy() range = (-.02, .02) for kk in np.arange(4): plt.figure() n_hist_pred, bin_edges, _ = plt.hist( residuals[:, kk], label='Residuals', linestyle=line_style[0], alpha=alph, bins=100, range=range) plt.suptitle('Residuals of %s' % train_x.columns[kk]) plt.xlabel(residual_strings[kk]) # (train_x.columns[kk], train_x.columns[kk], train_x.columns[kk])) plt.ylabel('Number of jets') ms.sciy() #plt.yscale('log') std = np.std(residuals[:, kk]) std_err = utils.std_error(residuals[:, kk]) mean = np.nanmean(residuals[:, kk]) sem = stats.sem(residuals[:, kk], nan_policy='omit') ax = plt.gca() plt.text(.75, .8, 'Mean = %f$\pm$%f\n$\sigma$ = %f$\pm$%f' % (mean, sem, std, std_err), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10}, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=18) fig_name = model_name + '_residual_%s' % train_x.columns[kk] plt.savefig(curr_save_folder + fig_name) res_df = pd.DataFrame({'pt': residuals[:, 0], 'eta': residuals[:, 1], 'phi': residuals[:, 2], 'E': residuals[:, 3]}) save = True # Generate a custom diverging colormap cmap = sns.diverging_palette(10, 220, as_cmap=True) #cmap = 'RdBu' norm = mpl.colors.Normalize(vmin=-1, vmax=1, clip=False) mappable = mpl.cm.ScalarMappable(norm=norm, cmap=cmap) group = ['pt', 'eta', 'phi', 'E'] label_kwargs = {'fontsize': 20} title_kwargs = {"fontsize": 11} mpl.rcParams['lines.linewidth'] = 1 mpl.rcParams['xtick.labelsize'] = 12 mpl.rcParams['ytick.labelsize'] = 12 group_arr = res_df.values corr = res_df.corr() qs = np.quantile(group_arr, q=[.0025, .9975], axis=0) ndim = qs.shape[1] ranges = [tuple(qs[:, kk]) for kk in np.arange(ndim)] figure = corner(group_arr, range=ranges, plot_density=True, plot_contours=True, no_fill_contours=False, #range=[range for i in np.arange(ndim)], bins=50, labels=group, label_kwargs=label_kwargs, #truths=[0 for kk in np.arange(qs.shape[1])], show_titles=True, title_kwargs=title_kwargs, quantiles=(0.16, 0.84), # levels=(1 - np.exp(-0.5), .90), fill_contours=False, title_fmt='.2e') levels=(1 - np.exp(-0.5), .90), fill_contours=False, title_fmt='.1e') # # Extract the axes axes = np.array(figure.axes).reshape((ndim, ndim)) # Loop over the diagonal linecol = 'r' linstyl = 'dashed' # Loop over the histograms for yi in np.arange(ndim): for xi in np.arange(yi): ax = axes[yi, xi] # Set face color according to correlation ax.set_facecolor(color=mappable.to_rgba(corr.values[yi, xi])) cax = figure.add_axes([.87, .4, .04, 0.55]) cbar = plt.colorbar(mappable, cax=cax, format='%.1f', ticks=np.arange(-1., 1.1, 0.2)) cbar.ax.set_ylabel('Correlation', fontsize=20) if save: fig_name = 'corner_3d.png' plt.savefig(curr_save_folder + fig_name)
fig_name = 'hist_%s' % train.columns[kk] plt.savefig(curr_save_folder + fig_name) # Residuals residual_strings = [r'$(p_{T,recon} - p_{T,true}) / p_{T,true}$', r'$(\eta_{recon} - \eta_{true}) / \eta_{true}$', r'$(\phi_{recon} - \phi_{true}) / \phi_{true}$', r'$(E_{recon} - E_{true}) / E_{true}$'] residuals = (pred.detach().numpy() - data.detach().numpy()) / data.detach().numpy() range = (-.1, .1) #range=None for kk in np.arange(len(test.keys())): plt.figure() n_hist_pred, bin_edges, _ = plt.hist( residuals[:, kk], label='Residuals', linestyle=line_style[0], alpha=alph, bins=200, range=range) plt.suptitle('Residuals of %s' % train.columns[kk]) plt.xlabel(residual_strings[kk]) # (train.columns[kk], train.columns[kk], train.columns[kk])) plt.ylabel('Number of jets') ms.sciy() # plt.yscale('log') # rms = utils.nanrms(residuals[:, kk]) std = np.std(residuals[:, kk]) std_err = utils.std_error(residuals[:, kk]) mean = np.nanmean(residuals[:, kk]) sem = stats.sem(residuals[:, kk], nan_policy='omit') ax = plt.gca() plt.text(.75, .8, 'Mean = %f$\pm$%f\n$\sigma$ = %f$\pm$%f' % (mean, sem, std, std_err), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10}, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=18) fig_name = 'residual_%s' % train.columns[kk] plt.savefig(curr_save_folder + fig_name)
mjj_z = np.array(mjj_z_list) mjj_z = mjj_z[mjj_z > -1e10] range = (-20, 3000) n_hist_data, bin_edges, _ = plt.hist(mjj_orig, bins=100, color='c', label='Original', range=range, density=False) plt.hist(mjj_z, bins=bin_edges, color='orange', alpha=0.8, label='AE output', range=range, density=False) plt.legend() plt.xlabel('$m_{jj}$') plt.ylabel('Number of jets') plt.suptitle('%dD latent space' % latent_dim) #plt.yscale('log') ms.sciy() std_orig = np.std(mjj_orig) std_z = np.std(mjj_z) std_err_orig = utils.std_error(mjj_orig) std_err_z = utils.std_error(mjj_z) mean_orig = np.mean(mjj_orig) mean_z = np.mean(mjj_z) sem_orig = stats.sem(mjj_orig, nan_policy='omit') sem_z = stats.sem(mjj_z, nan_policy='omit') ax = plt.gca() plt.text(.75, .6, 'Original\nMean=%f$\pm$%f\n$\sigma$=%f$\pm$%f' % (mean_orig, sem_orig, std_orig, std_err_orig), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10}, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20) plt.text(.75, .4, 'AE output\nMean=%f$\pm$%f\n$\sigma$=%f$\pm$%f' % (mean_z, sem_z, std_z, std_err_z), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10}, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20) if save: plt.savefig('mjj_%d_z%d_decompressed.png' % (sample, latent_dim)) if not save: