def test_kissgp_classification_error(self):
        model = GPClassificationModel()
        likelihood = BernoulliLikelihood()
        mll = gpytorch.mlls.VariationalMarginalLogLikelihood(
            likelihood,
            model,
            n_data=len(train_y),
        )

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        optimizer = optim.SGD(model.parameters(), lr=0.1)
        optimizer.n_iter = 0
        for _ in range(200):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.n_iter += 1
            optimizer.step()

        # Set back to eval mode
        model.eval()
        likelihood.eval()
        test_preds = (likelihood(
            model(train_x)).mean().ge(0.5).float().mul(2).sub(1).squeeze())
        mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
        self.assertLess(mean_abs_error.data.squeeze()[0], 1e-5)
    def test_classification_fast_pred_var(self):
        with gpytorch.fast_pred_var():
            train_x, train_y = train_data()
            likelihood = BernoulliLikelihood()
            model = GPClassificationModel(train_x)
            mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, num_data=len(train_y))

            # Find optimal model hyperparameters
            model.train()
            likelihood.train()
            optimizer = optim.Adam(model.parameters(), lr=0.1)
            optimizer.n_iter = 0
            for _ in range(50):
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.n_iter += 1
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            optimizer.step()

            # Set back to eval mode
            model.eval()
            likelihood.eval()
            test_preds = likelihood(model(train_x)).mean.ge(0.5).float().mul(2).sub(1).squeeze()

            mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
            self.assertLess(mean_abs_error.item(), 1e-5)
예제 #3
0
def test_kissgp_classification_fast_pred_var():
    with gpytorch.fast_pred_var():
        train_x, train_y = train_data()
        likelihood = BernoulliLikelihood()
        model = GPClassificationModel(train_x.data)
        mll = gpytorch.mlls.VariationalMarginalLogLikelihood(
            likelihood, model, n_data=len(train_y))

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()
        optimizer = optim.Adam(model.parameters(), lr=0.1)
        optimizer.n_iter = 0
        for i in range(50):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.n_iter += 1
            optimizer.step()

        # Set back to eval mode
        model.eval()
        likelihood.eval()
        test_preds = likelihood(
            model(train_x)).mean().ge(0.5).float().mul(2).sub(1).squeeze()

        mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
        assert (mean_abs_error.data.squeeze()[0] < 1e-5)
예제 #4
0
    def test_classification_error(self):
        train_x, train_y = train_data()
        likelihood = BernoulliLikelihood()
        model = GPClassificationModel(train_x)
        mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y))

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()
        optimizer = optim.Adam(model.parameters(), lr=0.1)
        optimizer.n_iter = 0
        for _ in range(75):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.n_iter += 1
            optimizer.step()

        for param in model.parameters():
            self.assertTrue(param.grad is not None)
            self.assertGreater(param.grad.norm().item(), 0)
        for param in likelihood.parameters():
            self.assertTrue(param.grad is not None)
            self.assertGreater(param.grad.norm().item(), 0)

        # Set back to eval mode
        model.eval()
        likelihood.eval()
        test_preds = likelihood(model(train_x)).mean.round()
        mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
        assert mean_abs_error.item() < 1e-5
예제 #5
0
    def test_kissgp_classification_error(self):
        model = GPClassificationModel()
        likelihood = BernoulliLikelihood()
        mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y))

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        with gpytorch.settings.max_preconditioner_size(5):
            optimizer = optim.Adam(model.parameters(), lr=0.15)
            optimizer.n_iter = 0
            for _ in range(100):
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.n_iter += 1
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

            # Set back to eval mode
            model.eval()
            likelihood.eval()

            test_preds = model(train_x).mean.ge(0.5).float()
            mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
            self.assertLess(mean_abs_error.squeeze().item(), 1e-5)
예제 #6
0
    def test_classification_error(self,
                                  cuda=False,
                                  mll_cls=gpytorch.mlls.VariationalELBO):
        train_x, train_y = train_data(cuda=cuda)
        likelihood = BernoulliLikelihood()
        model = SVGPClassificationModel(torch.linspace(0, 1, 25))
        mll = mll_cls(likelihood, model, num_data=len(train_y))
        if cuda:
            likelihood = likelihood.cuda()
            model = model.cuda()
            mll = mll.cuda()

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()
        optimizer = optim.Adam([{
            "params": model.parameters()
        }, {
            "params": likelihood.parameters()
        }],
                               lr=0.1)

        _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg)
        _cg_mock = patch("gpytorch.utils.linear_cg", new=_wrapped_cg)
        with warnings.catch_warnings(record=True) as ws, _cg_mock as cg_mock:
            for _ in range(400):
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

            # Set back to eval mode
            model.eval()
            likelihood.eval()
            test_preds = likelihood(
                model(train_x)).mean.squeeze().round().float()
            mean_abs_error = torch.mean(torch.ne(train_y, test_preds).float())
            self.assertLess(mean_abs_error.item(), 2e-1)

            # Make sure CG was called (or not), and no warnings were thrown
            self.assertFalse(cg_mock.called)
            self.assertFalse(
                any(
                    issubclass(w.category, ExtraComputationWarning)
                    for w in ws))
    def test_kissgp_classification_error(self):
        with gpytorch.settings.use_toeplitz(False):
            model = GPClassificationModel()
            likelihood = BernoulliLikelihood()
            mll = gpytorch.mlls.VariationalMarginalLogLikelihood(
                likelihood,
                model,
                n_data=len(train_y),
            )

            # Find optimal model hyperparameters
            model.train()
            likelihood.train()

            optimizer = optim.Adam(model.parameters(), lr=0.15)
            optimizer.n_iter = 0
            for _ in range(25):
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.n_iter += 1
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

            # Set back to eval mode
            model.eval()
            likelihood.eval()

            test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(
                1).squeeze()
            mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)

        self.assertLess(mean_abs_error.data.squeeze().item(), 0.15)
class GPClassifier(ApproximateGP):

    _num_outputs = 1  # to inform GPyTorchModel API

    def __init__(self,
                 dim: int,
                 train_X: Tensor,
                 train_Y: Tensor,
                 options: dict,
                 which_type: Optional[str] = "obj") -> None:

        variational_distribution = CholeskyVariationalDistribution(
            train_X.size(0))
        variational_strategy = UnwhitenedVariationalStrategy(
            self,
            train_X,
            variational_distribution,
            learn_inducing_locations=False)
        super(GPClassifier, self).__init__(variational_strategy)

        self.dim = dim

        # pdb.set_trace()
        if len(train_X) == 0:  # No data case
            train_X = None
            train_Y = None
            self.train_inputs = None
            self.train_targets = None
            self.train_x = None
            self.train_yl = None
        else:
            # Error checking:
            assert train_Y.dim() == 1, "train_Y is required to be 1D"
            assert train_X.shape[
                -1] == self.dim, "Input dimensions do not agree ... (!)"
            self.train_inputs = [train_X.clone()]
            self.train_targets = train_Y.clone()
            self.train_x = train_X.clone()
            self.train_yl = torch.cat(
                [torch.zeros((len(train_Y)), 1),
                 train_Y.view(-1, 1)], dim=1)

        print("\n")
        logger.info("### Initializing GP classifier for constraint g(x) ###")

        # Likelihood:
        noise_std = options.hyperpars.noise_std.value
        self.likelihood = BernoulliLikelihood()

        # For compatibility:
        self.threshold = torch.tensor([float("Inf")])

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Initialize prior mean:
        # self.mean_module = ConstantMean()
        self.mean_module = ZeroMean()

        # Initialize covariance function:
        base_kernel = MaternKernel(nu=2.5,
                                   ard_num_dims=self.dim,
                                   lengthscale=0.1 * torch.ones(self.dim))
        self.covar_module = ScaleKernel(base_kernel=base_kernel)

        self.disp_info_scipy_opti = True

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        self.Nrestarts = options.hyperpars.optimization.Nrestarts

        self._update_hyperparameters()

        self.eval()
        self.likelihood.eval()

        # pdb.set_trace()

    def set_hyperparameters(self, lengthscale, outputscale, noise):
        self.covar_module.base_kernel.lengthscale = lengthscale
        self.covar_module.outputscale = outputscale
        # self.likelihood.noise[:] = noise
        # self.mean_module.constant[:] = 0.0 # Assume zero mean

    def display_hyperparameters(self):
        logger.info("  Re-optimized hyperparameters")
        logger.info("  ----------------------------")
        logger.info("    Outputscale (stddev) | {0:2.4f}".format(
            self.covar_module.outputscale.item()))
        logger.info("    Lengthscale(s)       | " +
                    str(self.covar_module.base_kernel.lengthscale.detach().cpu(
                    ).numpy().flatten()))

    def logging(self):
        log_out = dict()
        log_out[
            "lengthscale"] = self.covar_module.base_kernel.lengthscale.detach(
            ).cpu().numpy()
        log_out["outputscale"] = self.covar_module.outputscale.item()
        # log_out["noise"] = self.likelihood.noise.detach().cpu().numpy()
        log_out[
            "train_inputs"] = None if self.train_inputs is None else self.train_inputs[
                0].detach().cpu().numpy()
        log_out[
            "train_targets"] = None if self.train_targets is None else self.train_targets.detach(
            ).cpu().numpy()

        return log_out

    def _update_hyperparameters(self):

        # Find optimal model hyperparameters
        self.train()
        self.likelihood.train()

        # Use the adam optimizer
        optimizer = Adam(self.parameters(), lr=0.1)

        # "Loss" for GPs - the marginal log likelihood
        # num_data refers to the number of training datapoints
        mll = VariationalELBO(self.likelihood, self,
                              self.train_targets.numel())

        training_iterations = 50
        for i in range(training_iterations):
            # Zero backpropped gradients from previous iteration
            optimizer.zero_grad()
            # Get predictive output
            output = self(self.train_inputs[0])
            # Calc loss and backprop gradients
            loss = -mll(output, self.train_targets)
            loss.backward()
            # print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
            optimizer.step()

    def _optimize_acqui_use_restarts_individually(self):

        # Get initial random restart points:
        logger.info("  Generating random restarts ...")
        options = {
            "maxiter": 200,
            "ftol": 1e-9,
            "method": "L-BFGS-B",
            "iprint": 2,
            "maxls": 20,
            "disp": self.disp_info_scipy_opti
        }
        bounds = torch.tensor(self.hyperpars_bounds,
                              device=device,
                              dtype=dtype)
        initial_conditions = gen_batch_initial_conditions(
            acq_function=self.mll_objective,
            bounds=bounds,
            q=1,
            num_restarts=self.Nrestarts,
            raw_samples=500,
            options=options)

        logger.info(
            "  Optimizing loss function with {0:d} restarts ...".format(
                self.Nrestarts))
        new_hyperpars_many = torch.zeros(size=(self.Nrestarts, 1,
                                               self.dim_hyperpars))
        new_hyperpars_loss_many = torch.zeros(size=(self.Nrestarts, ))

        new_hyperpars, _ = self.opti_hyperpars.run_optimization(
            x_restarts=initial_conditions.view(self.Nrestarts,
                                               self.dim_hyperpars))

        logger.info("  Done!")

        return new_hyperpars

    def _get_hyperparameters_bounds(self, hyperpriors):

        # Compute the domain for hyperparameter search by truncating the support of the corresponding hyperprior at the .75 quantile
        # The lower bound is necessary for numerical stability, i.e., when computing logpdf() in classireg.models.mll_gpcr.log_marginal()
        # All values of the dictionary are defined as double lists
        hyperpriors_support = dict(
            lengthscales=[[0.001] * self.dim,
                          [hyperpriors["lengthscales"].ppf(.75)] * self.dim],
            outputscale=[[0.001], [hyperpriors["outputscale"].ppf(.75)]])

        # Automatically get the bounds from the dictionary:
        hyperpars_lb = []
        hyperpars_ub = []
        for hyperpar in hyperpriors_support.values():
            hyperpars_lb += hyperpar[0]
            hyperpars_ub += hyperpar[1]
        hyperpars_bounds = [hyperpars_lb, hyperpars_ub]

        return hyperpars_bounds

    def _sample_hyperparameters_within_bounds(self, Nsamples):

        # Get a sample from the prior for initialization:
        new_seed = torch.randint(low=0, high=100000, size=(1, )).item(
        )  # Top-level seeds have an impact on this one herein; contrary to the case new_seed = None
        hyperpars_restarts = draw_sobol_samples(bounds=torch.tensor(
            self.hyperpars_bounds),
                                                n=Nsamples,
                                                q=1,
                                                seed=new_seed)
        hyperpars_restarts = hyperpars_restarts.squeeze(
            1)  # Remove batch dimension [n q dim] -> [n dim]

        return hyperpars_restarts

    def forward(self, x):

        # A `num_restarts x q x d` tensor of initial conditions.

        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        mvn = MultivariateNormal(mean_x, covar_x)
        return mvn

    def plot(self,
             axes=None,
             block=False,
             Ndiv=100,
             legend=True,
             title="GPgrad",
             plotting=True,
             plotCDF=False,
             clear_axes=False,
             Nsamples=None,
             ylabel=None,
             ylim=None,
             pause=None,
             showtickslabels_x=True,
             xlabel=None,
             labelsize=None,
             showtickslabels=None,
             showticks=None,
             linewidth=None,
             color=None,
             prob=False):
        '''
		This function hardcodes the plotting limits between zero and one for now
		'''
        if plotting == False or self.dim > 1:
            return

        pp = PlotProbability()
        xpred_vec = torch.linspace(0.0, 1.0, Ndiv)[:, None]
        xpred_vec = xpred_vec.unsqueeze(
            0)  # Ndiv batches of [q=1 x self.dim] dimensions each

        mvn_cons = self(xpred_vec)
        pred_lik = self.likelihood(mvn_cons)
        mean_vec = pred_lik.mean

        # Get upper and lower confidence bounds (2 standard deviations from the mean):
        var_vec = pred_lik.variance
        std_vec = var_vec.sqrt()
        lower_ci, upper_ci = mean_vec - std_vec, mean_vec + std_vec

        if self.dim == 1:
            axes = pp.plot_GP_1D(
                xpred_vec=xpred_vec.squeeze().cpu().numpy(),
                fpred_mode_vec=mean_vec.squeeze().detach().cpu().numpy(),
                fpred_quan_minus=lower_ci.squeeze().detach().cpu().numpy(),
                fpred_quan_plus=upper_ci.squeeze().detach().cpu().numpy(),
                X_sta=None if self.train_inputs is None else
                self.train_inputs[0].detach().cpu().numpy(),
                Y_sta=None if self.train_targets is None else
                self.train_targets.detach().cpu().numpy(),
                title=title,
                axes=axes,
                block=block,
                legend=legend,
                clear_axes=True,
                xlabel=xlabel,
                ylabel=ylabel,
                xlim=np.array([0., 1.]),
                ylim=ylim,
                labelsize="x-large",
                legend_loc="best",
                colormap="paper",
                showtickslabels_x=showtickslabels_x)

            if Nsamples is not None:
                f_sample = posterior.sample(
                    sample_shape=torch.Size([Nsamples]))
                for k in range(Nsamples):
                    axes.plot(xpred_vec.squeeze().detach().cpu().numpy(),
                              f_sample[k, 0, :, 0],
                              linestyle="--",
                              linewidth=1.0,
                              color="sienna")

        elif self.dim == 2:
            pass

        plt.show(block=block)
        if pause is not None:
            plt.pause(pause)

        return axes
n_iterations = 100
for i in range(n_iterations):
    # Zero backpropped gradients from previous iteration
    optimizer.zero_grad()
    # Get predictive output
    output = model(x_train)
    # Calc loss and backprop gradients
    loss = -mll(output, y_train)
    loss.backward()
    print(f"Iter {i} - Loss: {loss.item()}")
    optimizer.step()

# Go into eval mode
model.eval()
likelihood.eval()

with torch.no_grad():
    # Test x are regularly spaced by 0.01 0,1 inclusive
    test_x = torch.linspace(0, 1, 101)
    # Get classification predictions
    observed_pred = likelihood(model(test_x))

    # Initialize fig and axes for plot
    f, ax = plt.subplots(1, 1, figsize=(4, 3))
    ax.plot(x_train.numpy(), y_train.numpy(), 'k*')
    # Get the predicted labels (probabilites of belonging to the positive class)
    # Transform these probabilities to be 0/1 labels
    pred_labels = observed_pred.mean.ge(0.5).float()
    ax.plot(test_x.numpy(), pred_labels.numpy(), 'b')
    ax.set_ylim([-1, 2])