Ejemplo n.º 1
0
 def test_set_transformed_inputs(self):
     for dtype in (torch.float, torch.double):
         train_x = torch.rand(5, 1, dtype=dtype, device=self.device)
         train_y = torch.rand(5, 1, dtype=dtype, device=self.device)
         tf = Normalize(
             d=1,
             bounds=torch.tensor([[0.0], [2.0]], dtype=dtype, device=self.device),
             transform_on_preprocess=False,
         )
         model = SingleTaskGP(train_x, train_y, input_transform=tf)
         self.assertTrue(torch.equal(model.train_inputs[0], train_x))
         mll = ExactMarginalLogLikelihood(model.likelihood, model)
         # check that input transform is only applied when the transform
         # is a transform_on_preprocess is True
         self.assertTrue(torch.equal(model.train_inputs[0], train_x))
         tf.transform_on_preprocess = True
         _set_transformed_inputs(mll)
         self.assertTrue(torch.equal(model.train_inputs[0], tf(train_x)))
         model.eval()
         # test no set_train_data method
         mock_model = MockGP(MockPosterior())
         mock_model.train_inputs = (train_x,)
         mock_model.likelihood = model.likelihood
         mock_model.input_transform = tf
         mll = ExactMarginalLogLikelihood(mock_model.likelihood, mock_model)
         with self.assertRaises(BotorchError):
             _set_transformed_inputs(mll)
Ejemplo n.º 2
0
class GaussianProcess(object):
    def __init__(self, dx, param_normalizer, *args, **kwargs):
        print(dx)
        self.param_normalizer = param_normalizer
        self.data_normalizer = normalization_tools.Standardizer()
        self.gp = None

    def fit(self, x_train, y_train):
        # normalize parameter (=input) data
        x_train_norm = self.param_normalizer.project_to(x_train)
        # normalize the data
        y_train_norm = self.data_normalizer.standardize(y_train)

        self.gp = SingleTaskGP(x_train_norm, y_train_norm)
        self.gp.likelihood.noise_covar.register_constraint(
            "raw_noise", GreaterThan(1e-5))
        mll = ExactMarginalLogLikelihood(self.gp.likelihood, self.gp)
        fit_gpytorch_model(mll)
        return self.gp

    def predict(self, x):
        x_norm = self.param_normalizer.project_to(x)
        self.gp.eval()
        self.gp.likelihood.eval()
        with torch.set_grad_enabled(False):
            pred = self.gp(x_norm)
        return self.data_normalizer.unstandardize(pred.mean.view(
            -1, 1)), self.data_normalizer.unstandardize_wo_mean(pred.variance)
Ejemplo n.º 3
0
def bo_loop(gp_model: SingleTaskGP, acq_func_id: str,
            acq_func_kwargs: Dict[str, Any], acq_func_opt_kwargs: Dict[str,
                                                                       Any],
            bounds: Tensor, tkwargs: Dict[str, Any], q: int, num_restarts: int,
            raw_initial_samples, seed: int,
            num_MC_sample_acq: int) -> Iterable[Any]:
    # seed everything
    np.random.seed(seed)
    torch.manual_seed(seed)

    # put on proper device

    # we want to maximize
    fmax = torch.quantile(gp_model.train_targets, .9).item()
    print(f"Using good point cutoff {fmax:.2f}")

    device = gp_model.train_inputs[0].device

    bounds = bounds.to(**tkwargs)
    gp_model.eval()

    acq_func_kwargs['best_f'] = fmax
    acq_func = query_acq_func(acq_func_id=acq_func_id,
                              acq_func_kwargs=acq_func_kwargs,
                              gp_model=gp_model,
                              q=q,
                              num_MC_samples_acq=num_MC_sample_acq
                              )  # if q is 1 use analytic acquisitions
    acq_func.to(**tkwargs)

    options = {
        'batch_limit': 100
    } if acq_func_opt_kwargs == {} else acq_func_opt_kwargs
    print("Start acquisition function optimization...")
    if q == 1:
        # use optimize_acq (with LBFGS)
        candidate, acq_value = optimize_acqf(acq_function=acq_func,
                                             bounds=bounds,
                                             q=q,
                                             num_restarts=num_restarts,
                                             raw_samples=raw_initial_samples,
                                             return_best_only=True,
                                             options=options)
    else:
        candidate, acq_value = optimize_acqf_torch(
            acq_function=acq_func,
            bounds=bounds,
            q=q,
            num_restarts=num_restarts,
            raw_samples=raw_initial_samples,
            return_best_only=True,
            options=options,
        )
    print(f"Acquired {candidate} with acquisition value {acq_value}")
    return candidate.to(device=device)
    def update(self, inputs, targets, *args, **kwargs):
        inputs = inputs.view(-1, self.input_dim)
        targets = targets.view(-1, self.target_dim)
        self._raw_inputs = [torch.cat([*self._raw_inputs, inputs], dim=-2)]
        self._raw_targets = torch.cat([self._raw_targets, targets], dim=-2)
        for i in range(inputs.shape[-2]):
            new_x = self.stem(inputs[i].unsqueeze(0))
            new_y = targets[i].unsqueeze(0)
            _, ranked_models = torch.sort(self._construct_weights(new_x),
                                          dim=0,
                                          descending=True)
            num_candidates = math.ceil(len(self.models) / 2)

            assignment = None
            for model_idx in ranked_models[:num_candidates]:
                num_data = self.models[model_idx].train_targets.size(-1)
                if num_data >= self.max_data_per_model:
                    continue
                else:
                    assignment = model_idx.squeeze(-1)
                    ######################
                    # dummy to init caches
                    self.models[assignment](new_x)
                    ######################
                    new_model = self.models[
                        assignment].condition_on_observations(new_x, new_y)
                    self.models[assignment] = new_model
                    self.update_model_caches()
                    break

            if assignment is None:
                print("Adding new model")
                assignment = torch.tensor((len(self.models), ),
                                          device=new_x.device)
                new_model = SingleTaskGP(new_x,
                                         new_y,
                                         covar_module=self.covar_module)
                new_model.likelihood.initialize(noise=self.noise)
                new_model.eval()
                self.models.append(new_model)
                self.update_model_caches()

            self._assignments = torch.cat([self._assignments, assignment])

        self.train()
        features = self._refresh_features()
        train_dist = self(features)
        loss = -self.mll(train_dist, [m.train_targets for m in self.models])
        loss.backward()
        self.optimizer.step()
        gp_loss = stem_loss = loss.item()

        return gp_loss, stem_loss
Ejemplo n.º 5
0
def gp_fit_test(x_train: Tensor,
                y_train: Tensor,
                error_train: Tensor,
                x_test: Tensor,
                y_test: Tensor,
                error_test: Tensor,
                gp_obj_model: SingleTaskGP,
                gp_error_model: SingleTaskGP,
                tkwargs: Dict[str, Any],
                gp_test_folder: str,
                obj_out_wp: bool = False,
                err_out_wp: bool = False) -> None:
    """
    1) Estimates mean test error between predicted and the true objective function values.
    2) Estimates mean test error between predicted recon. error by the gp_model and the true recon. error of the vae_model.
    :param x_train: normalised points at which the gps were trained
    :param y_train: objective value function corresponding to x_train that were used as targets of `gp_obj_model`
    :param error_train: reconstruction error value at points x_train that were used as targets of `gp_error_model`
    :param x_test: normalised test points
    :param y_test: objective value function corresponding to x_test
    :param error_test: reconstruction error at test points
    :param gp_obj_model: the gp model trained to predict the black box objective function values
    :param gp_error_model: the gp model trained to predict reconstruction error
    :param tkwargs: dict of type and device
    :param gp_test_folder: folder to save test results
    :param obj_out_wp: if the `gp_obj_model` was trained with output warping then need to apply the same transform
    :param err_out_wp: if the `gp_error_model` was trained with output warping then need to apply the same transform
    :return: (Sum_i||true_y_i - pred_y_i||^2 / n_points, Sum_i||true_recon_i - pred_recon_i||^2 / n_points)
    """
    do_robust = True if gp_error_model is not None else False
    if not os.path.exists(gp_test_folder):
        os.mkdir(gp_test_folder)

    gp_obj_model.eval()
    gp_obj_model.to(tkwargs['device'])
    y_train = y_train.view(-1)
    if do_robust:
        gp_error_model.eval()
        gp_error_model.to(tkwargs['device'])
        error_train = error_train.view(-1)

    with torch.no_grad():
        if obj_out_wp:
            Y_numpy = y_train.cpu().numpy()
            if Y_numpy.min() <= 0:
                y_train = torch.FloatTensor(
                    power_transform(Y_numpy / Y_numpy.std(),
                                    method='yeo-johnson'))
            else:
                y_train = torch.FloatTensor(
                    power_transform(Y_numpy / Y_numpy.std(), method='box-cox'))
                if y_train.std() < 0.5:
                    Y_numpy = y_train.numpy()
                    y_train = torch.FloatTensor(
                        power_transform(Y_numpy / Y_numpy.std(),
                                        method='yeo-johnson')).to(x_train)

            Y_numpy = y_test.cpu().numpy()
            if Y_numpy.min() <= 0:
                y_test = torch.FloatTensor(
                    power_transform(Y_numpy / Y_numpy.std(),
                                    method='yeo-johnson'))
            else:
                y_test = torch.FloatTensor(
                    power_transform(Y_numpy / Y_numpy.std(), method='box-cox'))
                if y_test.std() < 0.5:
                    Y_numpy = y_test.numpy()
                    y_test = torch.FloatTensor(
                        power_transform(Y_numpy / Y_numpy.std(),
                                        method='yeo-johnson')).to(x_test)

        y_train = y_train.view(-1).to(**tkwargs)
        y_test = y_test.view(-1).to(**tkwargs)

        gp_obj_val_model_mse_train = (
            gp_obj_model.posterior(x_train).mean.view(-1) -
            y_train).pow(2).div(len(y_train))
        gp_obj_val_model_mse_test = (
            gp_obj_model.posterior(x_test).mean.view(-1) - y_test).pow(2).div(
                len(y_test))
        torch.save(
            gp_obj_val_model_mse_train,
            os.path.join(gp_test_folder, 'gp_obj_val_model_mse_train.npz'))
        torch.save(gp_obj_val_model_mse_test,
                   os.path.join(gp_test_folder, 'gp_obj_val_model_test.npz'))
        print(
            f'GP training fit on objective value: MSE={gp_obj_val_model_mse_train.sum().item():.5f}'
        )
        print(
            f'GP testing fit on objective value: MSE={gp_obj_val_model_mse_test.sum().item():.5f}'
        )

        if do_robust:
            if err_out_wp:
                error_train = error_train.view(-1, 1)
                R_numpy = error_train.cpu().numpy()
                if R_numpy.min() <= 0:
                    error_train = torch.FloatTensor(
                        power_transform(R_numpy / R_numpy.std(),
                                        method='yeo-johnson'))
                else:
                    error_train = torch.FloatTensor(
                        power_transform(R_numpy / R_numpy.std(),
                                        method='box-cox'))
                    if error_train.std() < 0.5:
                        R_numpy = error_train.numpy()
                        error_train = torch.FloatTensor(
                            power_transform(R_numpy / R_numpy.std(),
                                            method='yeo-johnson')).to(x_train)

                R_numpy = error_test.cpu().numpy()
                if R_numpy.min() <= 0:
                    error_test = torch.FloatTensor(
                        power_transform(R_numpy / R_numpy.std(),
                                        method='yeo-johnson'))
                else:
                    error_test = torch.FloatTensor(
                        power_transform(R_numpy / R_numpy.std(),
                                        method='box-cox'))
                    if error_test.std() < 0.5:
                        R_numpy = error_test.numpy()
                        error_test = torch.FloatTensor(
                            power_transform(R_numpy / R_numpy.std(),
                                            method='yeo-johnson')).to(x_test)

            error_train = error_train.view(-1).to(**tkwargs)
            error_test = error_test.view(-1).to(**tkwargs)

            pred_recon_train = gp_error_model.posterior(x_train).mean.view(-1)
            pred_recon_test = gp_error_model.posterior(x_test).mean.view(-1)

            gp_error_model_mse_train = (error_train -
                                        pred_recon_train).pow(2).div(
                                            len(error_train))
            gp_error_model_mse_test = (error_test -
                                       pred_recon_test).pow(2).div(
                                           len(error_test))
            torch.save(
                gp_error_model_mse_train,
                os.path.join(gp_test_folder, 'gp_error_model_mse_train.npz'))
            torch.save(
                gp_error_model_mse_test,
                os.path.join(gp_test_folder, 'gp_error_model_mse_test.npz'))
            print(
                f'GP training fit on reconstruction errors: MSE={gp_error_model_mse_train.sum().item():.5f}'
            )
            print(
                f'GP testing fit on reconstruction errors: MSE={gp_error_model_mse_test.sum().item():.5f}'
            )
            torch.save(error_test,
                       os.path.join(gp_test_folder, f"true_rec_err_z.pt"))
            torch.save(error_train,
                       os.path.join(gp_test_folder, f"error_train.pt"))

        torch.save(x_train, os.path.join(gp_test_folder, f"train_x.pt"))
        torch.save(x_test, os.path.join(gp_test_folder, f"test_x.pt"))
        torch.save(y_train, os.path.join(gp_test_folder, f"y_train.pt"))
        torch.save(x_test, os.path.join(gp_test_folder, f"X_test.pt"))
        torch.save(y_test, os.path.join(gp_test_folder, f"y_test.pt"))

        # y plots
        plt.hist(y_train.cpu().numpy(),
                 bins=100,
                 label='y train',
                 alpha=0.5,
                 density=True)
        plt.hist(gp_obj_model.posterior(x_train).mean.view(
            -1).detach().cpu().numpy(),
                 bins=100,
                 label='y pred',
                 alpha=0.5,
                 density=True)
        plt.legend()
        plt.title('Training set')
        plt.savefig(os.path.join(gp_test_folder, 'gp_obj_train.pdf'))
        plt.close()

        plt.hist(gp_obj_val_model_mse_train.detach().cpu().numpy(),
                 bins=100,
                 alpha=0.5,
                 density=True)
        plt.title('MSE of gp_obj_val model on training set')
        plt.savefig(os.path.join(gp_test_folder, 'gp_obj_train_mse.pdf'))
        plt.close()

        plt.hist(y_test.cpu().numpy(),
                 bins=100,
                 label='y true',
                 alpha=0.5,
                 density=True)
        plt.hist(gp_obj_model.posterior(x_test).mean.detach().cpu().numpy(),
                 bins=100,
                 alpha=0.5,
                 label='y pred',
                 density=True)
        plt.legend()
        plt.title('Validation set')
        plt.savefig(os.path.join(gp_test_folder, 'gp_obj_test.pdf'))
        plt.close()

        plt.hist(gp_obj_val_model_mse_test.detach().cpu().numpy(),
                 bins=100,
                 alpha=0.5,
                 density=True)
        plt.title('MSE of gp_obj_val model on validation set')
        plt.savefig(os.path.join(gp_test_folder, 'gp_obj_test_mse.pdf'))
        plt.close()

        if do_robust:
            # error plots
            plt.hist(error_train.cpu().numpy(),
                     bins=100,
                     label='error train',
                     alpha=0.5,
                     density=True)
            plt.hist(
                gp_error_model.posterior(x_train).mean.detach().cpu().numpy(),
                bins=100,
                label='error pred',
                alpha=0.5,
                density=True)
            plt.legend()
            plt.title('Training set')
            plt.savefig(os.path.join(gp_test_folder, 'gp_error_train.pdf'))
            plt.close()

            plt.hist(gp_error_model_mse_train.detach().cpu().numpy(),
                     bins=100,
                     alpha=0.5,
                     density=True)
            plt.title('MSE of gp_error model on training set')
            plt.savefig(os.path.join(gp_test_folder, 'gp_error_train_mse.pdf'))
            plt.close()

            plt.hist(error_test.cpu().numpy(),
                     bins=100,
                     label='error true',
                     alpha=0.5,
                     density=True)
            plt.hist(
                gp_error_model.posterior(x_test).mean.detach().cpu().numpy(),
                bins=100,
                alpha=0.5,
                label='error pred',
                density=True)
            plt.legend()
            plt.title('Validation set')
            plt.savefig(os.path.join(gp_test_folder, 'gp_error_test.pdf'))
            plt.close()

            plt.hist(gp_error_model_mse_test.detach().cpu().numpy(),
                     bins=100,
                     alpha=0.5,
                     density=True)
            plt.title('MSE of gp_error model on validation set')
            plt.savefig(os.path.join(gp_test_folder, 'gp_error_test_mse.pdf'))
            plt.close()

            # y-error plots
            y_train_sorted, indices_train = torch.sort(y_train)
            error_train_sorted = error_train[indices_train]
            gp_y_train_pred_sorted, indices_train_pred = torch.sort(
                gp_obj_model.posterior(x_train).mean.view(-1))
            gp_r_train_pred_sorted = (gp_error_model.posterior(
                x_train).mean.view(-1))[indices_train_pred]
            plt.scatter(y_train_sorted.cpu().numpy(),
                        error_train_sorted.cpu().numpy(),
                        label='true',
                        marker='+')
            plt.scatter(gp_y_train_pred_sorted.detach().cpu().numpy(),
                        gp_r_train_pred_sorted.detach().cpu().numpy(),
                        label='pred',
                        marker='*')
            plt.xlabel('y train targets')
            plt.ylabel('recon. error train targets')
            plt.title('y_train vs. error_train')
            plt.legend()
            plt.savefig(
                os.path.join(gp_test_folder, 'scatter_obj_error_train.pdf'))
            plt.close()

            y_test_std_sorted, indices_test = torch.sort(y_test)
            error_test_sorted = error_test[indices_test]
            gp_y_test_pred_sorted, indices_test_pred = torch.sort(
                gp_obj_model.posterior(x_test).mean.view(-1))
            gp_r_test_pred_sorted = (gp_error_model.posterior(
                x_test).mean.view(-1))[indices_test_pred]
            plt.scatter(y_test_std_sorted.cpu().numpy(),
                        error_test_sorted.cpu().numpy(),
                        label='true',
                        marker='+')
            plt.scatter(gp_y_test_pred_sorted.detach().cpu().numpy(),
                        gp_r_test_pred_sorted.detach().cpu().numpy(),
                        label='pred',
                        marker='*')
            plt.xlabel('y test targets')
            plt.ylabel('recon. error test targets')
            plt.title('y_test vs. error_test')
            plt.legend()
            plt.savefig(
                os.path.join(gp_test_folder, 'scatter_obj_error_test.pdf'))
            plt.close()

            # error var plots
            error_train_sorted, indices_train_pred = torch.sort(error_train)
            # error_train_sorted = error_train
            # indices_train_pred = np.arange(len(error_train))
            gp_r_train_pred_sorted = gp_error_model.posterior(
                x_train).mean[indices_train_pred].view(-1)
            gp_r_train_pred_std_sorted = gp_error_model.posterior(
                x_train).variance.view(-1).sqrt()[indices_train_pred]
            plt.scatter(np.arange(len(indices_train_pred)),
                        error_train_sorted.cpu().numpy(),
                        label='err true',
                        marker='+',
                        color='C1',
                        s=15)
            plt.errorbar(
                np.arange(len(indices_train_pred)),
                gp_r_train_pred_sorted.detach().cpu().numpy().flatten(),
                yerr=gp_r_train_pred_std_sorted.detach().cpu().numpy().flatten(
                ),
                fmt='*',
                alpha=0.05,
                label='err pred',
                color='C0',
                ecolor='C0')
            plt.scatter(np.arange(len(indices_train_pred)),
                        gp_r_train_pred_sorted.detach().cpu().numpy(),
                        marker='*',
                        alpha=0.2,
                        s=10,
                        color='C0')
            # plt.scatter(np.arange(len(indices_train_pred)),
            #             (gp_r_train_pred_sorted + gp_r_train_pred_std_sorted).detach().cpu().numpy(),
            #             label='err pred mean+std', marker='.')
            # plt.scatter(np.arange(len(indices_train_pred)),
            #             (gp_r_train_pred_sorted - gp_r_train_pred_std_sorted).detach().cpu().numpy(),
            #             label='err pred mean-std', marker='.')
            plt.legend()
            plt.title('error predictions and uncertainty on train set')
            plt.savefig(
                os.path.join(gp_test_folder, 'gp_error_train_uncertainty.pdf'))
            plt.close()

            error_test_sorted, indices_test_pred = torch.sort(error_test)
            # error_test_sorted = error_test
            # indices_test_pred = np.arange(len(error_test_sorted))
            gp_r_test_pred_sorted = gp_error_model.posterior(x_test).mean.view(
                -1)[indices_test_pred]
            gp_r_test_pred_std_sorted = gp_error_model.posterior(
                x_test).variance.view(-1).sqrt()[indices_test_pred]
            plt.scatter(np.arange(len(indices_test_pred)),
                        error_test_sorted.cpu().numpy(),
                        label='err true',
                        marker='+',
                        color='C1',
                        s=15)
            plt.errorbar(
                np.arange(len(indices_test_pred)),
                gp_r_test_pred_sorted.detach().cpu().numpy().flatten(),
                yerr=gp_r_test_pred_std_sorted.detach().cpu().numpy().flatten(
                ),
                marker='*',
                alpha=0.05,
                label='err pred',
                color='C0',
                ecolor='C0')
            plt.scatter(np.arange(len(indices_test_pred)),
                        gp_r_test_pred_sorted.detach().cpu().numpy().flatten(),
                        marker='*',
                        color='C0',
                        alpha=0.2,
                        s=10)
            # plt.scatter(np.arange(len(indices_test_pred)),
            #             (gp_r_test_pred_sorted + gp_r_test_pred_std_sorted).detach().cpu().numpy(),
            #             label='err pred mean+std', marker='.')
            # plt.scatter(np.arange(len(indices_test_pred)),
            #             (gp_r_test_pred_sorted - gp_r_test_pred_std_sorted).detach().cpu().numpy(),
            #             label='err pred mean-std', marker='.')
            plt.legend()
            plt.title('error predictions and uncertainty on test set')
            plt.savefig(
                os.path.join(gp_test_folder, 'gp_error_test_uncertainty.pdf'))
            plt.close()

        # y var plots
        y_train_std_sorted, indices_train = torch.sort(y_train)
        gp_y_train_pred_sorted = gp_obj_model.posterior(
            x_train).mean[indices_train].view(-1)
        gp_y_train_pred_std_sorted = gp_obj_model.posterior(
            x_train).variance.sqrt()[indices_train].view(-1)
        plt.scatter(np.arange(len(indices_train)),
                    y_train_std_sorted.cpu().numpy(),
                    label='y true',
                    marker='+',
                    color='C1',
                    s=15)
        plt.scatter(np.arange(len(indices_train)),
                    gp_y_train_pred_sorted.detach().cpu().numpy(),
                    marker='*',
                    alpha=0.2,
                    s=10,
                    color='C0')
        plt.errorbar(
            np.arange(len(indices_train)),
            gp_y_train_pred_sorted.detach().cpu().numpy().flatten(),
            yerr=gp_y_train_pred_std_sorted.detach().cpu().numpy().flatten(),
            fmt='*',
            alpha=0.05,
            label='y pred',
            color='C0',
            ecolor='C0')
        # plt.scatter(np.arange(len(indices_train_pred)),
        #             (gp_y_train_pred_sorted+gp_y_train_pred_std_sorted).detach().cpu().numpy(),
        #             label='y pred mean+std', marker='.')
        # plt.scatter(np.arange(len(indices_train_pred)),
        #             (gp_y_train_pred_sorted-gp_y_train_pred_std_sorted).detach().cpu().numpy(),
        #             label='y pred mean-std', marker='.')
        plt.legend()
        plt.title('y predictions and uncertainty on train set')
        plt.savefig(
            os.path.join(gp_test_folder, 'gp_obj_val_train_uncertainty.pdf'))
        plt.close()

        y_test_std_sorted, indices_test = torch.sort(y_test)
        gp_y_test_pred_sorted = gp_obj_model.posterior(x_test).mean.view(
            -1)[indices_test]
        gp_y_test_pred_std_sorted = gp_obj_model.posterior(
            x_test).variance.view(-1).sqrt()[indices_test]
        plt.scatter(np.arange(len(indices_test)),
                    y_test_std_sorted.cpu().numpy(),
                    label='y true',
                    marker='+',
                    color='C1',
                    s=15)
        plt.errorbar(
            np.arange(len(indices_test)),
            gp_y_test_pred_sorted.detach().cpu().numpy().flatten(),
            yerr=gp_y_test_pred_std_sorted.detach().cpu().numpy().flatten(),
            fmt='*',
            alpha=0.05,
            label='y pred',
            color='C0',
            ecolor='C0')
        plt.scatter(np.arange(len(indices_test)),
                    gp_y_test_pred_sorted.detach().cpu().numpy(),
                    marker='*',
                    alpha=0.2,
                    s=10,
                    color='C0')
        # plt.scatter(np.arange(len(indices_test_pred)),
        #             (gp_y_test_pred_sorted + gp_y_test_pred_std_sorted).detach().cpu().numpy(),
        #             label='y pred mean+std', marker='.')
        # plt.scatter(np.arange(len(indices_test_pred)),
        #             (gp_y_test_pred_sorted - gp_y_test_pred_std_sorted).detach().cpu().numpy(),
        #             label='y pred mean-std', marker='.')
        plt.legend()
        plt.title('y predictions and uncertainty on test set')
        plt.savefig(
            os.path.join(gp_test_folder, 'gp_obj_val_test_uncertainty.pdf'))
        plt.close()
Ejemplo n.º 6
0
def gp_torch_train(train_x: Tensor,
                   train_y: Tensor,
                   n_inducing_points: int,
                   tkwargs: Dict[str, Any],
                   init,
                   scale: bool,
                   covar_name: str,
                   gp_file: Optional[str],
                   save_file: str,
                   input_wp: bool,
                   outcome_transform: Optional[OutcomeTransform] = None,
                   options: Dict[str, Any] = None) -> SingleTaskGP:
    assert train_y.ndim > 1, train_y.shape
    assert gp_file or init, (gp_file, init)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()

    if init:
        # build hyp
        print("Initialize GP hparams...")
        print("Doing Kmeans init...")
        assert n_inducing_points > 0, n_inducing_points
        kmeans = MiniBatchKMeans(n_clusters=n_inducing_points,
                                 batch_size=min(10000, train_x.shape[0]),
                                 n_init=25)
        start_time = time.time()
        kmeans.fit(train_x.cpu().numpy())
        end_time = time.time()
        print(f"K means took {end_time - start_time:.1f}s to finish...")
        inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy())

        output_scale = None
        if scale:
            output_scale = train_y.var().item()
        lscales = torch.empty(1, train_x.shape[1])
        for i in range(train_x.shape[1]):
            lscales[0, i] = torch.pdist(train_x[:, i].view(
                -1, 1)).median().clamp(min=0.01)
        base_covar_module = query_covar(covar_name=covar_name,
                                        scale=scale,
                                        outputscale=output_scale,
                                        lscales=lscales)

        covar_module = InducingPointKernel(base_covar_module,
                                           inducing_points=inducing_points,
                                           likelihood=likelihood)

        input_warp_tf = None
        if input_wp:
            # Apply input warping
            # initialize input_warping transformation
            input_warp_tf = CustomWarp(
                indices=list(range(train_x.shape[-1])),
                # use a prior with median at 1.
                # when a=1 and b=1, the Kumaraswamy CDF is the identity function
                concentration1_prior=LogNormalPrior(0.0, 0.75**0.5),
                concentration0_prior=LogNormalPrior(0.0, 0.75**0.5),
            )

        model = SingleTaskGP(train_x,
                             train_y,
                             covar_module=covar_module,
                             likelihood=likelihood,
                             input_transform=input_warp_tf,
                             outcome_transform=outcome_transform)
    else:
        # load model
        output_scale = 1  # will be overwritten when loading model
        lscales = torch.ones(
            train_x.shape[1])  # will be overwritten when loading model
        base_covar_module = query_covar(covar_name=covar_name,
                                        scale=scale,
                                        outputscale=output_scale,
                                        lscales=lscales)
        covar_module = InducingPointKernel(base_covar_module,
                                           inducing_points=torch.empty(
                                               n_inducing_points,
                                               train_x.shape[1]),
                                           likelihood=likelihood)

        input_warp_tf = None
        if input_wp:
            # Apply input warping
            # initialize input_warping transformation
            input_warp_tf = Warp(
                indices=list(range(train_x.shape[-1])),
                # use a prior with median at 1.
                # when a=1 and b=1, the Kumaraswamy CDF is the identity function
                concentration1_prior=LogNormalPrior(0.0, 0.75**0.5),
                concentration0_prior=LogNormalPrior(0.0, 0.75**0.5),
            )
        model = SingleTaskGP(train_x,
                             train_y,
                             covar_module=covar_module,
                             likelihood=likelihood,
                             input_transform=input_warp_tf,
                             outcome_transform=outcome_transform)
        print("Loading GP from file")
        state_dict = torch.load(gp_file)
        model.load_state_dict(state_dict)

    print("GP regression")
    start_time = time.time()
    model.to(**tkwargs)
    model.train()

    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    # set approx_mll to False since we are using an exact marginal log likelihood
    # fit_gpytorch_model(mll, optimizer=fit_gpytorch_torch, approx_mll=False, options=options)
    fit_gpytorch_torch(mll,
                       options=options,
                       approx_mll=False,
                       clip_by_value=True if input_wp else False,
                       clip_value=10.0)
    end_time = time.time()
    print(f"Regression took {end_time - start_time:.1f}s to finish...")

    print("Save GP model...")
    torch.save(model.state_dict(), save_file)
    print("Done training of GP.")

    model.eval()
    return model
Ejemplo n.º 7
0
def render_singletask_gp(
    ax: [plt.Axes, Axes3D, Sequence[plt.Axes]],
    data_x: to.Tensor,
    data_y: to.Tensor,
    idcs_sel: list,
    data_x_min: to.Tensor = None,
    data_x_max: to.Tensor = None,
    x_label: str = '',
    y_label: str = '',
    z_label: str = '',
    min_gp_obsnoise: float = None,
    resolution: int = 201,
    num_stds: int = 2,
    alpha: float = 0.3,
    color: chr = None,
    curve_label: str = 'mean',
    heatmap_cmap: colors.Colormap = None,
    show_legend_posterior: bool = True,
    show_legend_std: bool = False,
    show_legend_data: bool = True,
    legend_data_cmap: colors.Colormap = None,
    colorbar_label: str = None,
    title: str = None,
    render3D: bool = True,
) -> plt.Figure:
    """
    Fit the GP posterior to the input data and plot the mean and std as well as the data points.
    There are 3 options: 1D plot (infered by data dimensions), 2D plot

    .. note::
        If you want to have a tight layout, it is best to pass axes of a figure with `tight_layout=True` or
        `constrained_layout=True`.

    :param ax: axis of the figure to plot on, only in case of a 2-dim heat map plot provide 2 axis
    :param data_x: data to plot on the x-axis
    :param data_y: data to process and plot on the y-axis
    :param idcs_sel: selected indices of the input data
    :param data_x_min: explicit minimum value for the evaluation grid, by default this value is extracted from `data_x`
    :param data_x_max: explicit maximum value for the evaluation grid, by default this value is extracted from `data_x`
    :param x_label: label for x-axis
    :param y_label: label for y-axis
    :param z_label: label for z-axis (3D plot only)
    :param min_gp_obsnoise: set a minimal noise value (normalized) for the GP, if `None` the GP has no measurement noise
    :param resolution: number of samples for the input (corresponds to x-axis resolution of the plot)
    :param num_stds: number of standard deviations to plot around the mean
    :param alpha: transparency (alpha-value) for the std area
    :param color: color (e.g. 'k' for black), `None` invokes the default behavior
    :param curve_label: label for the mean curve (1D plot only)
    :param heatmap_cmap: color map forwarded to `render_heatmap()` (2D plot only), `None` to use Pyrado's default
    :param show_legend_posterior: flag if the legend entry for the posterior should be printed (affects mean and std)
    :param show_legend_std: flag if a legend entry for the std area should be printed
    :param show_legend_data: flag if a legend entry for the individual data points should be printed
    :param legend_data_cmap: color map for the sampled points, default is 'binary'
    :param colorbar_label: label for the color bar (2D plot only)
    :param title: title displayed above the figure, set to `None` to suppress the title
    :param render3D: use 3D rendering if possible
    :return: handle to the resulting figure
    """
    if data_x.ndim != 2:
        raise pyrado.ShapeErr(
            msg=
            "The GP's input data needs to be of shape num_samples x dim_input!"
        )
    data_x = data_x[:, idcs_sel]  # forget the rest
    dim_x = data_x.shape[1]  # samples are along axis 0

    if data_y.ndim != 2:
        raise pyrado.ShapeErr(given=data_y,
                              expected_match=to.Size([data_x.shape[0], 1]))

    if legend_data_cmap is None:
        legend_data_cmap = plt.get_cmap('binary')

    # Project to normalized input and standardized output
    if data_x_min is None or data_x_max is None:
        data_x_min, data_x_max = to.min(data_x, dim=0)[0], to.max(data_x,
                                                                  dim=0)[0]
    data_y_mean, data_y_std = to.mean(data_y, dim=0), to.std(data_y, dim=0)
    data_x = (data_x - data_x_min) / (data_x_max - data_x_min)
    data_y = (data_y - data_y_mean) / data_y_std

    # Create and fit the GP model
    gp = SingleTaskGP(data_x, data_y)
    if min_gp_obsnoise is not None:
        gp.likelihood.noise_covar.register_constraint(
            'raw_noise', GreaterThan(min_gp_obsnoise))
    mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
    mll.train()
    fit_gpytorch_model(mll)
    print_cbt('Fitted the SingleTaskGP.', 'g')

    argmax_pmean_norm, argmax_pmean_val_stdzed = optimize_acqf(
        acq_function=PosteriorMean(gp),
        bounds=to.stack([to.zeros(dim_x), to.ones(dim_x)]),
        q=1,
        num_restarts=500,
        raw_samples=1000)
    # Project back
    argmax_posterior = argmax_pmean_norm * (data_x_max -
                                            data_x_min) + data_x_min
    argmax_pmean_val = argmax_pmean_val_stdzed * data_y_std + data_y_mean
    print_cbt(
        f'Converged to argmax of the posterior mean: {argmax_posterior.numpy()}',
        'g')

    mll.eval()
    gp.eval()

    if dim_x == 1:
        # Evaluation grid
        x_grid = np.linspace(min(data_x),
                             max(data_x),
                             resolution,
                             endpoint=True).flatten()
        x_grid = to.from_numpy(x_grid)

        # Mean and standard deviation of the surrogate model
        posterior = gp.posterior(x_grid)
        mean = posterior.mean.detach().flatten()
        std = to.sqrt(posterior.variance.detach()).flatten()

        # Project back from normalized input and standardized output
        x_grid = x_grid * (data_x_max - data_x_min) + data_x_min
        data_x = data_x * (data_x_max - data_x_min) + data_x_min
        data_y = data_y * data_y_std + data_y_mean
        mean = mean * data_y_std + data_y_mean
        std *= data_y_std  # double-checked with posterior.mvn.confidence_region()

        # Plot the curve
        plt.fill_between(x_grid.numpy(),
                         mean.numpy() - num_stds * std.numpy(),
                         mean.numpy() + num_stds * std.numpy(),
                         alpha=alpha,
                         color=color)
        ax.plot(x_grid.numpy(), mean.numpy(), color=color)

        # Plot the queried data points
        scat_plot = ax.scatter(data_x.numpy().flatten(),
                               data_y.numpy().flatten(),
                               marker='o',
                               c=np.arange(data_x.shape[0], dtype=np.int),
                               cmap=legend_data_cmap)

        if show_legend_data:
            scat_legend = ax.legend(
                *scat_plot.legend_elements(fmt='{x:.0f}'),  # integer formatter
                bbox_to_anchor=(0., 1.1, 1., -0.1),
                title='query points',
                ncol=data_x.shape[0],
                loc='upper center',
                mode='expand',
                borderaxespad=0.,
                handletextpad=-0.5)
            ax.add_artist(scat_legend)
            # Increase vertical space between subplots when printing the data labels
            # plt.tight_layout(pad=2.)  # ignore argument
            # plt.subplots_adjust(hspace=0.6)

        # Plot the argmax of the posterior mean
        # ax.scatter(argmax_posterior.item(), argmax_pmean_val, c='darkorange', marker='o', s=60, label='argmax')
        ax.axvline(argmax_posterior.item(),
                   c='darkorange',
                   lw=1.5,
                   label='argmax')

        if show_legend_posterior:
            ax.add_artist(ax.legend(loc='lower right'))

    elif dim_x == 2:
        # Create mesh grid matrices from x and y vectors
        # x0_grid = to.linspace(min(data_x[:, 0]), max(data_x[:, 0]), resolution)
        # x1_grid = to.linspace(min(data_x[:, 1]), max(data_x[:, 1]), resolution)
        x0_grid = to.linspace(0, 1, resolution)
        x1_grid = to.linspace(0, 1, resolution)
        x0_mesh, x1_mesh = to.meshgrid([x0_grid, x1_grid])
        x0_mesh, x1_mesh = x0_mesh.t(), x1_mesh.t(
        )  # transpose not necessary but makes identical mesh as np.meshgrid

        # Mean and standard deviation of the surrogate model
        x_test = to.stack([
            x0_mesh.reshape(resolution**2, 1),
            x1_mesh.reshape(resolution**2, 1)
        ], -1).squeeze(1)
        posterior = gp.posterior(
            x_test)  # identical to  gp.likelihood(gp(x_test))
        mean = posterior.mean.detach().reshape(resolution, resolution)
        std = to.sqrt(posterior.variance.detach()).reshape(
            resolution, resolution)

        # Project back from normalized input and standardized output
        data_x = data_x * (data_x_max - data_x_min) + data_x_min
        data_y = data_y * data_y_std + data_y_mean
        mean_raw = mean * data_y_std + data_y_mean
        std_raw = std * data_y_std

        if render3D:
            # Project back from normalized input and standardized output (custom for 3D)
            x0_mesh = x0_mesh * (data_x_max[0] - data_x_min[0]) + data_x_min[0]
            x1_mesh = x1_mesh * (data_x_max[1] - data_x_min[1]) + data_x_min[1]
            lower = mean_raw - num_stds * std_raw
            upper = mean_raw + num_stds * std_raw

            # Plot a 2D surface in 3D
            ax.plot_surface(x0_mesh.numpy(), x1_mesh.numpy(), mean_raw.numpy())
            ax.plot_surface(x0_mesh.numpy(),
                            x1_mesh.numpy(),
                            lower.numpy(),
                            color='r',
                            alpha=alpha)
            ax.plot_surface(x0_mesh.numpy(),
                            x1_mesh.numpy(),
                            upper.numpy(),
                            color='r',
                            alpha=alpha)
            ax.set_xlabel(x_label)
            ax.set_ylabel(y_label)
            ax.set_zlabel(z_label)

            # Plot the queried data points
            scat_plot = ax.scatter(data_x[:, 0].numpy(),
                                   data_x[:, 1].numpy(),
                                   data_y.numpy(),
                                   marker='o',
                                   c=np.arange(data_x.shape[0], dtype=np.int),
                                   cmap=legend_data_cmap)

            if show_legend_data:
                scat_legend = ax.legend(
                    *scat_plot.legend_elements(
                        fmt='{x:.0f}'),  # integer formatter
                    bbox_to_anchor=(0.05, 1.1, 0.95, -0.1),
                    loc='upper center',
                    ncol=data_x.shape[0],
                    mode='expand',
                    borderaxespad=0.,
                    handletextpad=-0.5)
                ax.add_artist(scat_legend)

            # Plot the argmax of the posterior mean
            x, y = argmax_posterior[0, 0], argmax_posterior[0, 1]
            ax.scatter(x,
                       y,
                       argmax_pmean_val,
                       c='darkorange',
                       marker='*',
                       s=60)
            # ax.plot((x, x), (y, y), (data_y.min(), data_y.max()), c='k', ls='--', lw=1.5)

        else:
            if not len(ax) == 4:
                raise pyrado.ShapeErr(
                    msg='Provide 4 axes! 2 heat maps and 2 color bars.')

            # Project back normalized input and standardized output (custom for 2D)
            x0_grid_raw = x0_grid * (data_x_max[0] -
                                     data_x_min[0]) + data_x_min[0]
            x1_grid_raw = x1_grid * (data_x_max[1] -
                                     data_x_min[1]) + data_x_min[1]

            # Plot a 2D image
            df_mean = pd.DataFrame(mean_raw.numpy(),
                                   columns=x0_grid_raw.numpy(),
                                   index=x1_grid_raw.numpy())
            render_heatmap(df_mean,
                           ax_hm=ax[0],
                           ax_cb=ax[1],
                           x_label=x_label,
                           y_label=y_label,
                           annotate=False,
                           fig_canvas_title='Returns',
                           tick_label_prec=2,
                           add_sep_colorbar=True,
                           cmap=heatmap_cmap,
                           colorbar_label=colorbar_label,
                           num_major_ticks_hm=3,
                           num_major_ticks_cb=2,
                           colorbar_orientation='horizontal')

            df_std = pd.DataFrame(std_raw.numpy(),
                                  columns=x0_grid_raw.numpy(),
                                  index=x1_grid_raw.numpy())
            render_heatmap(
                df_std,
                ax_hm=ax[2],
                ax_cb=ax[3],
                x_label=x_label,
                y_label=y_label,
                annotate=False,
                fig_canvas_title='Standard Deviations',
                tick_label_prec=2,
                add_sep_colorbar=True,
                cmap=heatmap_cmap,
                colorbar_label=colorbar_label,
                num_major_ticks_hm=3,
                num_major_ticks_cb=2,
                colorbar_orientation='horizontal',
                norm=colors.Normalize())  # explicitly instantiate a new norm

            # Plot the queried data points
            for i in [0, 2]:
                scat_plot = ax[i].scatter(data_x[:, 0].numpy(),
                                          data_x[:, 1].numpy(),
                                          marker='o',
                                          s=15,
                                          c=np.arange(data_x.shape[0],
                                                      dtype=np.int),
                                          cmap=legend_data_cmap)

                if show_legend_data:
                    scat_legend = ax[i].legend(
                        *scat_plot.legend_elements(
                            fmt='{x:.0f}'),  # integer formatter
                        bbox_to_anchor=(0., 1.1, 1., 0.05),
                        loc='upper center',
                        ncol=data_x.shape[0],
                        mode='expand',
                        borderaxespad=0.,
                        handletextpad=-0.5)
                    ax[i].add_artist(scat_legend)

            # Plot the argmax of the posterior mean
            ax[0].scatter(argmax_posterior[0, 0],
                          argmax_posterior[0, 1],
                          c='darkorange',
                          marker='*',
                          s=60)  # steelblue
            ax[2].scatter(argmax_posterior[0, 0],
                          argmax_posterior[0, 1],
                          c='darkorange',
                          marker='*',
                          s=60)  # steelblue
            # ax[0].axvline(argmax_posterior[0, 0], c='w', ls='--', lw=1.5)
            # ax[0].axhline(argmax_posterior[0, 1], c='w', ls='--', lw=1.5)
            # ax[2].axvline(argmax_posterior[0, 0], c='w', ls='--', lw=1.5)
            # ax[2].axhline(argmax_posterior[0, 1], c='w', ls='--', lw=1.5)

    else:
        raise pyrado.ValueErr(msg='Can only plot 1-dim or 2-dim data!')

    return plt.gcf()
Ejemplo n.º 8
0
      bounds=torch.Tensor([[-1], [1]]),
      q=1,
      num_restarts=5,
      raw_samples=20,
  )
  candidate_y = obj_noisy(candidate_x)
  train_x = torch.cat([train_x, candidate_x])
  train_y = torch.cat([train_y, candidate_y])
  model = model.condition_on_observations(X=candidate_x, Y=candidate_y)

  # Train GP...
  mll = ExactMarginalLogLikelihood(model.likelihood, model)
  fit_gpytorch_model(mll)

  # Plotting...
  model.eval()

  fig, ax = plt.subplots(1, 1, figsize=(6, 4))
  plt.title(f"Bayesian Opt. without derivatives, Iteration {it}")
  test_x = torch.linspace(-1, 1, steps=100)

  with torch.no_grad():
    posterior = model.posterior(test_x)
    # these are 2 std devs from mean
    lower, upper = posterior.mvn.confidence_region()

    ax.plot(test_x.cpu().numpy(),
            obj(test_x).cpu().numpy(),
            'r--',
            label="true, noiseless objective")
    ax.plot(train_x.cpu().numpy(), train_y.cpu().numpy(), 'k*', alpha=0.1, label="observations")