Esempio n. 1
0
        for kk, key in enumerate(test.keys()):
            if key in diff_list:
                curr_residuals = diff[:, kk]
                if key == 'AverageLArQF':
                    curr_residuals = curr_residuals[
                        np.abs(curr_residuals) < 1000]
                limits = (-1000, 1000)
                range = None
            else:
                curr_residuals = residuals[:, kk]
                limits = None
                range = (-0.1, 0.1)
            rms = utils.nanrms(curr_residuals)
            std = stats.tstd(curr_residuals, limits=limits)
            std_err = utils.std_error(curr_residuals)
            mean = np.mean(curr_residuals)
            sem = stats.sem(curr_residuals, nan_policy='omit')

            best_to_worst_dict[performance][key]['mean'].append(mean)
            best_to_worst_dict[performance][key]['std'].append(std)
            best_to_worst_dict[performance][key]['rms'].append(rms)
            best_to_worst_dict[performance][key]['std_err'].append(std_err)
            best_to_worst_dict[performance][key]['sem'].append(sem)

style_dict = {'best': 'b', 'median': 'g', 'worst': 'r'}

for key in test.keys():
    plt.figure()
    for performance in performances:
        plt.plot(best_to_worst_dict[performance]['latent_space'],
def make_plots(model, train_x, train_y, test_x, test_y, curr_save_folder, model_name):
  unit_list = ['[GeV]', '[rad]', '[rad]', '[GeV]']
  variable_list = [r'$p_T$', r'$\eta$', r'$\phi$', r'$E$']
  line_style = ['--', '-']
  colors = ['orange', 'c']
  markers = ['*', 's']

  model.to('cpu')

  # Histograms
  idxs = (0, 100000)  # Choose events to compare
  data = torch.tensor(test_x[idxs[0]:idxs[1]].values, dtype = torch.float)
  pred = model(data).detach().numpy()
  pred = np.multiply(pred, train_x.std().values)
  pred = np.add(pred, train_x.mean().values)
  data = np.multiply(data, train_x.std().values)
  data = np.add(data, train_x.mean().values)

  alph = 0.8
  n_bins = 50
  for kk in np.arange(4):
      plt.figure(kk + 4)
      n_hist_data, bin_edges, _ = plt.hist(data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins)
      n_hist_pred, _, _ = plt.hist(pred[:, kk], color=colors[0], label='Output', alpha=alph, bins=bin_edges)
      plt.suptitle(train_x.columns[kk])
      plt.xlabel(variable_list[kk] + ' ' + unit_list[kk])
      plt.ylabel('Number of events')
      ms.sciy()
      # plt.yscale('log')
      plt.legend()
      fig_name = model_name + '_hist_%s' % train_x.columns[kk]
      plt.savefig(curr_save_folder + fig_name)


  residual_strings = [r'$(p_{T,out} - p_{T,in}) / p_{T,in}$',
                          r'$(\eta_{out} - \eta_{in}) / \eta_{in}$',
                          r'$(\phi_{out} - \phi_{in}) / \phi_{in}$',
                          r'$(E_{out} - E_{in}) / E_{in}$']
  residuals = (pred - data.detach().numpy()) / data.detach().numpy()
  range = (-.02, .02)
  for kk in np.arange(4):
      plt.figure()
      n_hist_pred, bin_edges, _ = plt.hist(
          residuals[:, kk], label='Residuals', linestyle=line_style[0], alpha=alph, bins=100, range=range)
      plt.suptitle('Residuals of %s' % train_x.columns[kk])
      plt.xlabel(residual_strings[kk])  # (train_x.columns[kk], train_x.columns[kk], train_x.columns[kk]))
      plt.ylabel('Number of jets')
      ms.sciy()
      #plt.yscale('log')
      std = np.std(residuals[:, kk])
      std_err = utils.std_error(residuals[:, kk])
      mean = np.nanmean(residuals[:, kk])
      sem = stats.sem(residuals[:, kk], nan_policy='omit')
      ax = plt.gca()
      plt.text(.75, .8, 'Mean = %f$\pm$%f\n$\sigma$ = %f$\pm$%f' % (mean, sem, std, std_err), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10},
              horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=18)
      fig_name = model_name + '_residual_%s' % train_x.columns[kk]
      plt.savefig(curr_save_folder + fig_name)

  res_df = pd.DataFrame({'pt': residuals[:, 0], 'eta': residuals[:, 1], 'phi': residuals[:, 2], 'E': residuals[:, 3]})
  save = True

  # Generate a custom diverging colormap
  cmap = sns.diverging_palette(10, 220, as_cmap=True)
  #cmap = 'RdBu'
  norm = mpl.colors.Normalize(vmin=-1, vmax=1, clip=False)
  mappable = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)

  group = ['pt', 'eta', 'phi', 'E']

  label_kwargs = {'fontsize': 20}
  title_kwargs = {"fontsize": 11}
  mpl.rcParams['lines.linewidth'] = 1
  mpl.rcParams['xtick.labelsize'] = 12
  mpl.rcParams['ytick.labelsize'] = 12
  group_arr = res_df.values
  corr = res_df.corr()
  qs = np.quantile(group_arr, q=[.0025, .9975], axis=0)
  ndim = qs.shape[1]
  ranges = [tuple(qs[:, kk]) for kk in np.arange(ndim)]
  figure = corner(group_arr, range=ranges, plot_density=True, plot_contours=True, no_fill_contours=False, #range=[range for i in np.arange(ndim)],
                  bins=50, labels=group, label_kwargs=label_kwargs, #truths=[0 for kk in np.arange(qs.shape[1])],
                  show_titles=True, title_kwargs=title_kwargs, quantiles=(0.16, 0.84),
                  # levels=(1 - np.exp(-0.5), .90), fill_contours=False, title_fmt='.2e')
                  levels=(1 - np.exp(-0.5), .90), fill_contours=False, title_fmt='.1e')

  # # Extract the axes
  axes = np.array(figure.axes).reshape((ndim, ndim))
  # Loop over the diagonal
  linecol = 'r'
  linstyl = 'dashed'
  # Loop over the histograms
  for yi in np.arange(ndim):
      for xi in np.arange(yi):
          ax = axes[yi, xi]
          # Set face color according to correlation
          ax.set_facecolor(color=mappable.to_rgba(corr.values[yi, xi]))
  cax = figure.add_axes([.87, .4, .04, 0.55])
  cbar = plt.colorbar(mappable, cax=cax, format='%.1f', ticks=np.arange(-1., 1.1, 0.2))
  cbar.ax.set_ylabel('Correlation', fontsize=20)

  if save:
      fig_name = 'corner_3d.png'
      plt.savefig(curr_save_folder + fig_name)
Esempio n. 3
0
    fig_name = 'hist_%s' % train.columns[kk]
    plt.savefig(curr_save_folder + fig_name)

# Residuals
residual_strings = [r'$(p_{T,recon} - p_{T,true}) / p_{T,true}$',
                    r'$(\eta_{recon} - \eta_{true}) / \eta_{true}$',
                    r'$(\phi_{recon} - \phi_{true}) / \phi_{true}$',
                    r'$(E_{recon} - E_{true}) / E_{true}$']
residuals = (pred.detach().numpy() - data.detach().numpy()) / data.detach().numpy()
range = (-.1, .1)
#range=None
for kk in np.arange(len(test.keys())):
    plt.figure()
    n_hist_pred, bin_edges, _ = plt.hist(
        residuals[:, kk], label='Residuals', linestyle=line_style[0], alpha=alph, bins=200, range=range)
    plt.suptitle('Residuals of %s' % train.columns[kk])
    plt.xlabel(residual_strings[kk])  # (train.columns[kk], train.columns[kk], train.columns[kk]))
    plt.ylabel('Number of jets')
    ms.sciy()
    # plt.yscale('log')
    # rms = utils.nanrms(residuals[:, kk])
    std = np.std(residuals[:, kk])
    std_err = utils.std_error(residuals[:, kk])
    mean = np.nanmean(residuals[:, kk])
    sem = stats.sem(residuals[:, kk], nan_policy='omit')
    ax = plt.gca()
    plt.text(.75, .8, 'Mean = %f$\pm$%f\n$\sigma$ = %f$\pm$%f' % (mean, sem, std, std_err), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10},
             horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=18)
    fig_name = 'residual_%s' % train.columns[kk]
    plt.savefig(curr_save_folder + fig_name)
    mjj_z = np.array(mjj_z_list)

    mjj_z = mjj_z[mjj_z > -1e10]

    range = (-20, 3000)
    n_hist_data, bin_edges, _ = plt.hist(mjj_orig, bins=100, color='c', label='Original', range=range, density=False)
    plt.hist(mjj_z, bins=bin_edges, color='orange', alpha=0.8, label='AE output', range=range, density=False)
    plt.legend()
    plt.xlabel('$m_{jj}$')
    plt.ylabel('Number of jets')
    plt.suptitle('%dD latent space' % latent_dim)
    #plt.yscale('log')
    ms.sciy()
    std_orig = np.std(mjj_orig)
    std_z = np.std(mjj_z)
    std_err_orig = utils.std_error(mjj_orig)
    std_err_z = utils.std_error(mjj_z)
    mean_orig = np.mean(mjj_orig)
    mean_z = np.mean(mjj_z)
    sem_orig = stats.sem(mjj_orig, nan_policy='omit')
    sem_z = stats.sem(mjj_z, nan_policy='omit')
    ax = plt.gca()
    plt.text(.75, .6, 'Original\nMean=%f$\pm$%f\n$\sigma$=%f$\pm$%f' % (mean_orig, sem_orig, std_orig, std_err_orig), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10},
             horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20)
    plt.text(.75, .4, 'AE output\nMean=%f$\pm$%f\n$\sigma$=%f$\pm$%f' % (mean_z, sem_z, std_z, std_err_z), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10},
             horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=20)
    if save:
        plt.savefig('mjj_%d_z%d_decompressed.png' % (sample, latent_dim))


if not save: