예제 #1
0
    def gen_obs(self, x_points):
        from dataset import generating_function

        y = []
        for an_x in x_points:
            y.append(generating_function(an_x))

        return y
def visualise_optimisation(networks, x, y, epoch):
    test_x = np.arange(-3, 6, 0.01)
    y_to_plot = []
    uncertainty_to_plot = []
    sigmas = []
    mses = []

    mu_list_of_lists = []

    for an_x in test_x:
        mu_list = []
        for i, network in enumerate(networks):
            networks[i] = networks[i].eval()
            mu = networks[i](torch.FloatTensor([an_x]).to(CUDA_DEVICE))
            mu_list.append(mu)
            # mses.append((mu - generating_function(an_x)) ** 2)
        mu_list_of_lists.append(mu_list)

    epistemic_uncertainty = [
        compute_ensemble_reward(ensemble_predictions)
        for ensemble_predictions in mu_list_of_lists
    ]
    y_to_plot = torch.FloatTensor(
        [torch.mean(torch.stack(i)) for i in mu_list_of_lists])
    plt.rc("font", family="serif")
    plt.rc("xtick", labelsize="medium")
    plt.rc("ytick")
    plt.title("AMA Reward Intuition")
    plt.scatter(
        x.cpu().numpy(),
        y.cpu().numpy(),
        c="Red",
        marker=".",
        s=1.0,
        label="Training Data",
    )
    plt.plot(test_x, y_to_plot, color="purple", label="Fitted Function")
    plt.plot(
        test_x,
        [generating_function(i) for i in test_x],
        color="red",
        label="True Underlying Function",
        alpha=0.2,
    )
    plt.fill_between(
        test_x,
        y_to_plot - 4 *
        (torch.FloatTensor(epistemic_uncertainty).detach().cpu().numpy()),
        y_to_plot + 4 *
        (torch.FloatTensor(epistemic_uncertainty).detach().cpu().numpy()),
        alpha=0.2,
        color="purple",
        label="Epistemic Uncertainty",
    )
    plt.ylim(-6, 6)
    plt.legend(loc="lower right")
    plt.savefig(f"data/result_{epoch:0>8}.png")
    plt.close()
def test_step(bandit_env):
    from dataset import generating_function

    for i in range(1000):
        an_action = random.choice(bandit_env.action_space)
        (x, y), reward = bandit_env.step(an_action)
        assert len(x) == len(y)
        assert len(x) == bandit_env.obs_size
        for an_x in x:
            assert an_x > bandit_env.action_region_dict[an_action][0]
            assert an_x < bandit_env.action_region_dict[an_action][1]
        y_points_test = [generating_function(i) for i in x]
        assert np.array_equal(np.array(y_points_test), np.array(y))
예제 #4
0
def visualise_optimisation(network, x, y, epoch):
    test_x = np.arange(-3, 6, 0.01)
    y_to_plot = []
    uncertainty_to_plot = []
    sigmas = []
    mses = []

    for an_x in test_x:
        mu, log_sigma_squared = network(
            torch.FloatTensor([an_x]).to(CUDA_DEVICE))
        y_to_plot.append(mu)
        sigmas.append((torch.exp(log_sigma_squared)))
        mses.append((mu - generating_function(an_x))**2)

    y_to_plot = torch.FloatTensor(y_to_plot).cpu().numpy()
    plt.rc("font", family="serif")
    plt.rc("xtick", labelsize="medium")
    plt.rc("ytick")
    plt.title("AMA Reward Intuition")
    plt.scatter(
        x.cpu().numpy(),
        y.cpu().numpy(),
        c="Red",
        marker=".",
        s=1.0,
        label="Training Data",
        alpha=0.1,
    )
    plt.plot(test_x, y_to_plot, color="purple", label="Fitted Function")
    plt.plot(
        test_x,
        [generating_function(i) for i in test_x],
        color="red",
        label="True Underlying Function",
        alpha=0.2,
    )
    plt.fill_between(
        test_x,
        y_to_plot - (torch.FloatTensor(sigmas).detach().cpu().numpy()),
        y_to_plot + (torch.FloatTensor(sigmas).detach().cpu().numpy()),
        alpha=0.2,
        color="purple",
        label="Aleatoric Uncertainty",
    )
    plt.fill_between(
        test_x,
        y_to_plot - 2 * (torch.FloatTensor(sigmas).detach().cpu().numpy()),
        y_to_plot + 2 * (torch.FloatTensor(sigmas).detach().cpu().numpy()),
        alpha=0.2,
        color="purple",
    )
    plt.fill_between(
        test_x,
        y_to_plot - np.sqrt(
            np.clip(
                (torch.FloatTensor(mses).detach().cpu().numpy() -
                 torch.FloatTensor(sigmas).detach().cpu().numpy()),
                a_min=0,
                a_max=None,
            )),
        y_to_plot + np.sqrt(
            np.clip(
                (torch.FloatTensor(mses).detach().cpu().numpy() -
                 torch.FloatTensor(sigmas).detach().cpu().numpy()),
                a_min=0,
                a_max=None,
            )),
        alpha=0.2,
        color="orange",
        label="AMA Reward",
    )
    plt.fill_between(
        test_x,
        y_to_plot - 2 * np.sqrt(
            np.clip(
                (torch.FloatTensor(mses).detach().cpu().numpy() -
                 torch.FloatTensor(sigmas).detach().cpu().numpy()),
                a_min=0,
                a_max=None,
            )),
        y_to_plot + 2 * np.sqrt(
            np.clip(
                (torch.FloatTensor(mses).detach().cpu().numpy() -
                 torch.FloatTensor(sigmas).detach().cpu().numpy()),
                a_min=0,
                a_max=None,
            )),
        alpha=0.2,
        color="orange",
    )
    plt.ylim(-6, 6)
    plt.legend()
    plt.savefig(f"data/result_{epoch:0>8}.png")
    plt.close()