Exemple #1
0
def plot_histograms(pred,
                    data,
                    bins,
                    same_bin_edges=True,
                    colors=['orange', 'c'],
                    variable_list=[r'$p_T$', r'$\eta$', r'$\phi$', r'$E$'],
                    variable_names=['pT', 'eta', 'phi', 'E'],
                    unit_list=['[GeV]', '[rad]', '[rad]', '[GeV]'],
                    title=None):
    alph = 0.8
    n_bins = bins
    for kk in np.arange(4):
        plt.figure()
        n_hist_data, bin_edges, _ = plt.hist(data[:, kk],
                                             color=colors[1],
                                             label='Input',
                                             alpha=1,
                                             bins=n_bins)
        if same_bin_edges:
            n_bins_2 = bin_edges
        else:
            n_bins_2 = bins
        n_hist_pred, _, _ = plt.hist(pred[:, kk],
                                     color=colors[0],
                                     label='Output',
                                     alpha=alph,
                                     bins=n_bins_2)
        if title is None:
            plt.suptitle(variable_names[kk])
        else:
            plt.suptitle(title)
        plt.xlabel(variable_list[kk] + ' ' + unit_list[kk])
        plt.ylabel('Number of events')
        ms.sciy()
        plt.legend()
Exemple #2
0
def plot_residuals(pred,
                   data,
                   range=None,
                   variable_names=['pT', 'eta', 'phi', 'E'],
                   bins=1000,
                   save=None,
                   title=None):
    alph = 0.8
    residuals = (pred.numpy() - data.numpy()) / data.numpy()
    for kk in np.arange(4):
        plt.figure()
        n_hist_pred, bin_edges, _ = plt.hist(residuals[:, kk],
                                             label='Residuals',
                                             alpha=alph,
                                             bins=bins,
                                             range=range)
        if title is None:
            plt.suptitle('Residuals of %s' % variable_names[kk])
        else:
            plt.suptitle(title)
        plt.xlabel(
            r'$(%s_{recon} - %s_{true}) / %s_{true}$' %
            (variable_names[kk], variable_names[kk], variable_names[kk]))
        plt.ylabel('Number of events')
        ms.sciy()
        if save is not None:
            plt.savefig(save + '_%s' % variable_names[kk])
    model.eval()

    idxs = (0, int(1e5))  # Choose events to compare
    data = torch.tensor(test_x[idxs[0]:idxs[1]].values)
    latent = model.encode(data).detach().numpy()

    for kk in np.arange(latent.shape[1]):
        ax = ax1[ii, kk]
        plt.sca(ax)
        plt.hist(latent[:, kk], label='$z_%d$' % (kk + 1), color='m', bins=100)
        plt.suptitle('Latent variable histograms' % (kk + 1))
        if ii == len(model_list) - 1:
            plt.xlabel('$z_%d$' % (kk + 1))
        plt.title(model.describe(), fontsize=16)
        plt.legend()
        ms.sciy()
        plt.subplots_adjust(left=0.05, right=0.97, bottom=0.05, top=0.92)

fig2, ax2 = plt.subplots(ncols=len(model_list),
                         nrows=1,
                         figsize=(32, 8),
                         sharex=True,
                         sharey=True)
for ii, model in enumerate(model_list):
    save_path = model_folder + model_file_list[ii]
    model.load_state_dict(torch.load(save_path))
    model.eval()

    idxs = (0, 10000)  # Choose events to compare
    data = torch.tensor(test_x[idxs[0]:idxs[1]].values)
    latent = model.encode(data).detach().numpy()
def make_plots(model, train_x, train_y, test_x, test_y, curr_save_folder, model_name):
  unit_list = ['[GeV]', '[rad]', '[rad]', '[GeV]']
  variable_list = [r'$p_T$', r'$\eta$', r'$\phi$', r'$E$']
  line_style = ['--', '-']
  colors = ['orange', 'c']
  markers = ['*', 's']

  model.to('cpu')

  # Histograms
  idxs = (0, 100000)  # Choose events to compare
  data = torch.tensor(test_x[idxs[0]:idxs[1]].values, dtype = torch.float)
  pred = model(data).detach().numpy()
  pred = np.multiply(pred, train_x.std().values)
  pred = np.add(pred, train_x.mean().values)
  data = np.multiply(data, train_x.std().values)
  data = np.add(data, train_x.mean().values)

  alph = 0.8
  n_bins = 50
  for kk in np.arange(4):
      plt.figure(kk + 4)
      n_hist_data, bin_edges, _ = plt.hist(data[:, kk], color=colors[1], label='Input', alpha=1, bins=n_bins)
      n_hist_pred, _, _ = plt.hist(pred[:, kk], color=colors[0], label='Output', alpha=alph, bins=bin_edges)
      plt.suptitle(train_x.columns[kk])
      plt.xlabel(variable_list[kk] + ' ' + unit_list[kk])
      plt.ylabel('Number of events')
      ms.sciy()
      # plt.yscale('log')
      plt.legend()
      fig_name = model_name + '_hist_%s' % train_x.columns[kk]
      plt.savefig(curr_save_folder + fig_name)


  residual_strings = [r'$(p_{T,out} - p_{T,in}) / p_{T,in}$',
                          r'$(\eta_{out} - \eta_{in}) / \eta_{in}$',
                          r'$(\phi_{out} - \phi_{in}) / \phi_{in}$',
                          r'$(E_{out} - E_{in}) / E_{in}$']
  residuals = (pred - data.detach().numpy()) / data.detach().numpy()
  range = (-.02, .02)
  for kk in np.arange(4):
      plt.figure()
      n_hist_pred, bin_edges, _ = plt.hist(
          residuals[:, kk], label='Residuals', linestyle=line_style[0], alpha=alph, bins=100, range=range)
      plt.suptitle('Residuals of %s' % train_x.columns[kk])
      plt.xlabel(residual_strings[kk])  # (train_x.columns[kk], train_x.columns[kk], train_x.columns[kk]))
      plt.ylabel('Number of jets')
      ms.sciy()
      #plt.yscale('log')
      std = np.std(residuals[:, kk])
      std_err = utils.std_error(residuals[:, kk])
      mean = np.nanmean(residuals[:, kk])
      sem = stats.sem(residuals[:, kk], nan_policy='omit')
      ax = plt.gca()
      plt.text(.75, .8, 'Mean = %f$\pm$%f\n$\sigma$ = %f$\pm$%f' % (mean, sem, std, std_err), bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 10},
              horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=18)
      fig_name = model_name + '_residual_%s' % train_x.columns[kk]
      plt.savefig(curr_save_folder + fig_name)

  res_df = pd.DataFrame({'pt': residuals[:, 0], 'eta': residuals[:, 1], 'phi': residuals[:, 2], 'E': residuals[:, 3]})
  save = True

  # Generate a custom diverging colormap
  cmap = sns.diverging_palette(10, 220, as_cmap=True)
  #cmap = 'RdBu'
  norm = mpl.colors.Normalize(vmin=-1, vmax=1, clip=False)
  mappable = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)

  group = ['pt', 'eta', 'phi', 'E']

  label_kwargs = {'fontsize': 20}
  title_kwargs = {"fontsize": 11}
  mpl.rcParams['lines.linewidth'] = 1
  mpl.rcParams['xtick.labelsize'] = 12
  mpl.rcParams['ytick.labelsize'] = 12
  group_arr = res_df.values
  corr = res_df.corr()
  qs = np.quantile(group_arr, q=[.0025, .9975], axis=0)
  ndim = qs.shape[1]
  ranges = [tuple(qs[:, kk]) for kk in np.arange(ndim)]
  figure = corner(group_arr, range=ranges, plot_density=True, plot_contours=True, no_fill_contours=False, #range=[range for i in np.arange(ndim)],
                  bins=50, labels=group, label_kwargs=label_kwargs, #truths=[0 for kk in np.arange(qs.shape[1])],
                  show_titles=True, title_kwargs=title_kwargs, quantiles=(0.16, 0.84),
                  # levels=(1 - np.exp(-0.5), .90), fill_contours=False, title_fmt='.2e')
                  levels=(1 - np.exp(-0.5), .90), fill_contours=False, title_fmt='.1e')

  # # Extract the axes
  axes = np.array(figure.axes).reshape((ndim, ndim))
  # Loop over the diagonal
  linecol = 'r'
  linstyl = 'dashed'
  # Loop over the histograms
  for yi in np.arange(ndim):
      for xi in np.arange(yi):
          ax = axes[yi, xi]
          # Set face color according to correlation
          ax.set_facecolor(color=mappable.to_rgba(corr.values[yi, xi]))
  cax = figure.add_axes([.87, .4, .04, 0.55])
  cbar = plt.colorbar(mappable, cax=cax, format='%.1f', ticks=np.arange(-1., 1.1, 0.2))
  cbar.ax.set_ylabel('Correlation', fontsize=20)

  if save:
      fig_name = 'corner_3d.png'
      plt.savefig(curr_save_folder + fig_name)
def save_plots(learn, module_string, lr, wd, pp):
    # Make and save figures
    curr_mod_folder = get_mod_folder(module_string, lr, pp, wd)
    curr_save_folder = grid_search_folder + curr_mod_folder
    if not os.path.exists(curr_save_folder):
        os.mkdir(curr_save_folder)

    # Weight activation stats
    plot_activations(learn, save=curr_save_folder + 'weight_activation')

    # Plot losses
    batches = len(learn.recorder.losses)
    epos = len(learn.recorder.val_losses)
    val_iter = (batches / epos) * np.arange(1, epos + 1, 1)
    loss_name = str(loss_func).split("(")[0]
    plt.figure()
    plt.plot(learn.recorder.losses, label='Train')
    plt.plot(val_iter,
             learn.recorder.val_losses,
             label='Validation',
             color='orange')
    plt.yscale(value='log')
    plt.legend()
    plt.ylabel(loss_name)
    plt.xlabel('Batches processed')
    fig_name = 'losses'
    plt.savefig(curr_save_folder + fig_name)
    plt.figure()
    plt.plot(learn.recorder.val_losses, label='Validation', color='orange')
    plt.title('Validation loss')
    plt.legend()
    plt.ylabel(loss_name)
    plt.xlabel('Epoch')
    for i_val, val in enumerate(learn.recorder.val_losses):
        plt.text(i_val, val, str(val), horizontalalignment='center')
    fig_name = 'losses_val'
    plt.savefig(curr_save_folder + fig_name + '.png')
    with open(curr_save_folder + 'losses.txt', 'w') as f:
        for i_val, val in enumerate(learn.recorder.val_losses):
            f.write('Epoch %d    Validation %s: %e    Training %s: %e\n' %
                    (i_val, loss_name, val, loss_name,
                     learn.recorder.losses[(i_val + 1) *
                                           (int(batches / epos - 1))]))

    # Histograms
    idxs = (0, 100000)  # Choose events to compare
    pred, data = get_unnormalized_reconstructions(learn.model,
                                                  df=test_x,
                                                  idxs=idxs,
                                                  train_mean=train_mean,
                                                  train_std=train_std)

    alph = 0.8
    n_bins = 50
    for kk in np.arange(4):
        plt.figure()
        n_hist_data, bin_edges, _ = plt.hist(data[:, kk],
                                             color=colors[1],
                                             label='Input',
                                             alpha=1,
                                             bins=n_bins)
        n_hist_pred, _, _ = plt.hist(pred[:, kk],
                                     color=colors[0],
                                     label='Output',
                                     alpha=alph,
                                     bins=bin_edges)
        plt.suptitle(train_x.columns[kk])
        plt.xlabel(variable_list[kk] + ' ' + unit_list[kk])
        plt.ylabel('Number of events')
        ms.sciy()
        fig_name = 'hist_%s' % train_x.columns[kk]
        plt.savefig(curr_save_folder + fig_name)

    # Plot input on top of output
    idxs = (0, 100)  # Choose events to compare
    pred, data = get_unnormalized_reconstructions(learn.model,
                                                  df=test_x,
                                                  idxs=idxs,
                                                  train_mean=train_mean,
                                                  train_std=train_std)

    for kk in np.arange(4):
        plt.figure()
        plt.plot(data[:, kk],
                 color=colors[1],
                 label='Input',
                 linestyle=line_style[1],
                 marker=markers[1])
        plt.plot(pred[:, kk],
                 color=colors[0],
                 label='Output',
                 linestyle=line_style[0],
                 marker=markers[0])
        plt.suptitle(train.columns[kk])
        plt.xlabel('Event')
        plt.ylabel(variable_list[kk] + ' ' + unit_list[kk])
        plt.legend()
        ms.sciy()
        fig_name = 'plot_%s' % train_x.columns[kk]
        plt.savefig(curr_save_folder + fig_name)

    # Plot latent space
    data = torch.tensor(test_x.values)
    latent = learn.model.encode(data).detach().numpy()
    for ii in np.arange(latent.shape[1]):
        plt.figure()
        plt.hist(latent[:, ii], label='$z_%d$' % (ii + 1), color='m')
        plt.suptitle('Latent variable #%d' % (ii + 1))
        plt.legend()
        ms.sciy()
        fig_name = 'latent_hist_z%d' % (ii + 1)
        plt.savefig(curr_save_folder + fig_name)

    # Latent space scatter plots
    idxs = (0, 10000)  # Choose events to compare
    data = torch.tensor(test_x[idxs[0]:idxs[1]].values)
    latent = learn.model.encode(data).detach().numpy()
    mksz = 1
    plt.figure()
    plt.scatter(latent[:, 0], latent[:, 1], s=mksz)
    plt.xlabel(r'$z_1$')
    plt.ylabel(r'$z_2$')
    fig_name = 'latent_scatter_z1z2'
    plt.savefig(curr_save_folder + fig_name)

    plt.figure()
    plt.scatter(latent[:, 0], latent[:, 2], s=mksz)
    plt.xlabel(r'$z_1$')
    plt.ylabel(r'$z_3$')
    fig_name = 'latent_scatter_z1z3'
    plt.savefig(curr_save_folder + fig_name)

    plt.figure()
    plt.scatter(latent[:, 1], latent[:, 2], s=mksz)
    plt.xlabel(r'$z_2$')
    plt.ylabel(r'$z_3$')
    fig_name = 'latent_scatter_z2z3'
    plt.savefig(curr_save_folder + fig_name)

    # Low pT histograms
    # Histograms
    idxs = (0, 100000)  # Choose events to compare
    pred, data = get_unnormalized_reconstructions(learn.model,
                                                  df=test_x,
                                                  idxs=idxs,
                                                  train_mean=train_mean,
                                                  train_std=train_std)

    alph = 0.8
    n_bins = 50
    for kk in np.arange(4):
        plt.figure()
        n_hist_data, bin_edges, _ = plt.hist(data[:, kk],
                                             color=colors[1],
                                             label='Input',
                                             alpha=1,
                                             bins=n_bins)
        n_hist_pred, _, _ = plt.hist(pred[:, kk],
                                     color=colors[0],
                                     label='Output',
                                     alpha=alph,
                                     bins=bin_edges)
        plt.suptitle(train_x.columns[kk])
        plt.xlabel(variable_list[kk] + ' ' + unit_list[kk])
        plt.ylabel('Number of events')
        ms.sciy()
        plt.legend()
        fig_name = 'lowpt_hist_%s' % train_x.columns[kk]
        plt.savefig(curr_save_folder + fig_name)

    return curr_mod_folder