def gen_obs(self, x_points): from dataset import generating_function y = [] for an_x in x_points: y.append(generating_function(an_x)) return y
def visualise_optimisation(networks, x, y, epoch): test_x = np.arange(-3, 6, 0.01) y_to_plot = [] uncertainty_to_plot = [] sigmas = [] mses = [] mu_list_of_lists = [] for an_x in test_x: mu_list = [] for i, network in enumerate(networks): networks[i] = networks[i].eval() mu = networks[i](torch.FloatTensor([an_x]).to(CUDA_DEVICE)) mu_list.append(mu) # mses.append((mu - generating_function(an_x)) ** 2) mu_list_of_lists.append(mu_list) epistemic_uncertainty = [ compute_ensemble_reward(ensemble_predictions) for ensemble_predictions in mu_list_of_lists ] y_to_plot = torch.FloatTensor( [torch.mean(torch.stack(i)) for i in mu_list_of_lists]) plt.rc("font", family="serif") plt.rc("xtick", labelsize="medium") plt.rc("ytick") plt.title("AMA Reward Intuition") plt.scatter( x.cpu().numpy(), y.cpu().numpy(), c="Red", marker=".", s=1.0, label="Training Data", ) plt.plot(test_x, y_to_plot, color="purple", label="Fitted Function") plt.plot( test_x, [generating_function(i) for i in test_x], color="red", label="True Underlying Function", alpha=0.2, ) plt.fill_between( test_x, y_to_plot - 4 * (torch.FloatTensor(epistemic_uncertainty).detach().cpu().numpy()), y_to_plot + 4 * (torch.FloatTensor(epistemic_uncertainty).detach().cpu().numpy()), alpha=0.2, color="purple", label="Epistemic Uncertainty", ) plt.ylim(-6, 6) plt.legend(loc="lower right") plt.savefig(f"data/result_{epoch:0>8}.png") plt.close()
def test_step(bandit_env): from dataset import generating_function for i in range(1000): an_action = random.choice(bandit_env.action_space) (x, y), reward = bandit_env.step(an_action) assert len(x) == len(y) assert len(x) == bandit_env.obs_size for an_x in x: assert an_x > bandit_env.action_region_dict[an_action][0] assert an_x < bandit_env.action_region_dict[an_action][1] y_points_test = [generating_function(i) for i in x] assert np.array_equal(np.array(y_points_test), np.array(y))
def visualise_optimisation(network, x, y, epoch): test_x = np.arange(-3, 6, 0.01) y_to_plot = [] uncertainty_to_plot = [] sigmas = [] mses = [] for an_x in test_x: mu, log_sigma_squared = network( torch.FloatTensor([an_x]).to(CUDA_DEVICE)) y_to_plot.append(mu) sigmas.append((torch.exp(log_sigma_squared))) mses.append((mu - generating_function(an_x))**2) y_to_plot = torch.FloatTensor(y_to_plot).cpu().numpy() plt.rc("font", family="serif") plt.rc("xtick", labelsize="medium") plt.rc("ytick") plt.title("AMA Reward Intuition") plt.scatter( x.cpu().numpy(), y.cpu().numpy(), c="Red", marker=".", s=1.0, label="Training Data", alpha=0.1, ) plt.plot(test_x, y_to_plot, color="purple", label="Fitted Function") plt.plot( test_x, [generating_function(i) for i in test_x], color="red", label="True Underlying Function", alpha=0.2, ) plt.fill_between( test_x, y_to_plot - (torch.FloatTensor(sigmas).detach().cpu().numpy()), y_to_plot + (torch.FloatTensor(sigmas).detach().cpu().numpy()), alpha=0.2, color="purple", label="Aleatoric Uncertainty", ) plt.fill_between( test_x, y_to_plot - 2 * (torch.FloatTensor(sigmas).detach().cpu().numpy()), y_to_plot + 2 * (torch.FloatTensor(sigmas).detach().cpu().numpy()), alpha=0.2, color="purple", ) plt.fill_between( test_x, y_to_plot - np.sqrt( np.clip( (torch.FloatTensor(mses).detach().cpu().numpy() - torch.FloatTensor(sigmas).detach().cpu().numpy()), a_min=0, a_max=None, )), y_to_plot + np.sqrt( np.clip( (torch.FloatTensor(mses).detach().cpu().numpy() - torch.FloatTensor(sigmas).detach().cpu().numpy()), a_min=0, a_max=None, )), alpha=0.2, color="orange", label="AMA Reward", ) plt.fill_between( test_x, y_to_plot - 2 * np.sqrt( np.clip( (torch.FloatTensor(mses).detach().cpu().numpy() - torch.FloatTensor(sigmas).detach().cpu().numpy()), a_min=0, a_max=None, )), y_to_plot + 2 * np.sqrt( np.clip( (torch.FloatTensor(mses).detach().cpu().numpy() - torch.FloatTensor(sigmas).detach().cpu().numpy()), a_min=0, a_max=None, )), alpha=0.2, color="orange", ) plt.ylim(-6, 6) plt.legend() plt.savefig(f"data/result_{epoch:0>8}.png") plt.close()