def test_classification_error_cuda(self):
        if torch.cuda.is_available():
            train_x, train_y = train_data(cuda=True)
            likelihood = BernoulliLikelihood().cuda()
            model = GPClassificationModel(train_x).cuda()
            mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, num_data=len(train_y))

            # Find optimal model hyperparameters
            model.train()
            optimizer = optim.Adam(model.parameters(), lr=0.1)
            optimizer.n_iter = 0
            for _ in range(50):
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.n_iter += 1
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            optimizer.step()

            # Set back to eval mode
            model.eval()
            test_preds = likelihood(model(train_x)).mean.ge(0.5).float().mul(2).sub(1).squeeze()
            mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
            self.assertLess(mean_abs_error.item(), 1e-5)
예제 #2
0
    def test_classification_error(self,
                                  cuda=False,
                                  mll_cls=gpytorch.mlls.VariationalELBO):
        train_x, train_y = train_data(cuda=cuda)
        likelihood = BernoulliLikelihood()
        model = SVGPClassificationModel(torch.linspace(0, 1, 25))
        mll = mll_cls(likelihood, model, num_data=len(train_y))
        if cuda:
            likelihood = likelihood.cuda()
            model = model.cuda()
            mll = mll.cuda()

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()
        optimizer = optim.Adam([{
            "params": model.parameters()
        }, {
            "params": likelihood.parameters()
        }],
                               lr=0.1)

        _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg)
        _cg_mock = patch("gpytorch.utils.linear_cg", new=_wrapped_cg)
        with warnings.catch_warnings(record=True) as ws, _cg_mock as cg_mock:
            for _ in range(400):
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

            # Set back to eval mode
            model.eval()
            likelihood.eval()
            test_preds = likelihood(
                model(train_x)).mean.squeeze().round().float()
            mean_abs_error = torch.mean(torch.ne(train_y, test_preds).float())
            self.assertLess(mean_abs_error.item(), 2e-1)

            # Make sure CG was called (or not), and no warnings were thrown
            self.assertFalse(cg_mock.called)
            self.assertFalse(
                any(
                    issubclass(w.category, ExtraComputationWarning)
                    for w in ws))
예제 #3
0
def test_kissgp_classification_error_cuda():
    if torch.cuda.is_available():
        train_x, train_y = train_data(cuda=True)
        likelihood = BernoulliLikelihood().cuda()
        model = GPClassificationModel(train_x.data).cuda()
        mll = gpytorch.mlls.VariationalMarginalLogLikelihood(
            likelihood, model, n_data=len(train_y))

        # Find optimal model hyperparameters
        model.train()
        optimizer = optim.Adam(model.parameters(), lr=0.1)
        optimizer.n_iter = 0
        for i in range(50):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.n_iter += 1
            optimizer.step()

        # Set back to eval mode
        model.eval()
        test_preds = likelihood(
            model(train_x)).mean().ge(0.5).float().mul(2).sub(1).squeeze()
        mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
        assert (mean_abs_error.data.squeeze()[0] < 1e-5)
예제 #4
0
 def __init__(self):
     super(GPClassificationModel, self).__init__(BernoulliLikelihood())
     self.mean_module = ConstantMean(constant_bounds=[-1e-5, 1e-5])
     self.covar_module = RBFKernel(log_lengthscale_bounds=(-5, 6))
     self.register_parameter('log_outputscale',
                             nn.Parameter(torch.Tensor([0])),
                             bounds=(-5, 6))
예제 #5
0
 def __init__(self,
              stem,
              init_x,
              num_inducing,
              lr,
              streaming=False,
              beta=1.0,
              learn_inducing_locations=True,
              num_update_steps=1,
              **kwargs):
     super().__init__()
     likelihood = BernoulliLikelihood()
     inducing_points = torch.empty(num_inducing, stem.output_dim)
     inducing_points.uniform_(-1, 1)
     mean_module = ZeroMean()
     covar_module = ScaleKernel(RBFKernel(ard_num_dims=stem.output_dim))
     self.gp = VariationalGPModel(
         inducing_points,
         mean_module,
         covar_module,
         streaming,
         likelihood,
         beta=beta,
         learn_inducing_locations=learn_inducing_locations)
     self.mll = None
     self.stem = stem
     self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
     self.num_update_steps = num_update_steps
     self._raw_inputs = [init_x]
예제 #6
0
    def test_kissgp_classification_error(self):
        model = GPClassificationModel()
        likelihood = BernoulliLikelihood()
        mll = gpytorch.mlls.VariationalELBO(likelihood,
                                            model,
                                            num_data=len(train_y))

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        optimizer = optim.SGD(model.parameters(), lr=0.01)
        optimizer.n_iter = 0
        for _ in range(200):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.n_iter += 1
            optimizer.step()

        for _, param in model.named_parameters():
            self.assertTrue(param.grad is not None)
            self.assertGreater(param.grad.norm().item(), 0)

        for param in likelihood.parameters():
            self.assertTrue(param.grad is not None)
            self.assertGreater(param.grad.norm().item(), 0)

        # Set back to eval mode
        model.eval()
        likelihood.eval()
        test_preds = likelihood(model(train_x)).mean.ge(0.5).float()
        mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
        self.assertLess(mean_abs_error.squeeze().item(), 1e-5)
예제 #7
0
    def test_classification_fast_pred_var(self):
        with gpytorch.settings.fast_pred_var():
            train_x, train_y = train_data()
            likelihood = BernoulliLikelihood()
            model = GPClassificationModel(train_x)
            mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, num_data=len(train_y))

            # Find optimal model hyperparameters
            model.train()
            likelihood.train()
            optimizer = optim.Adam(model.parameters(), lr=0.1)
            optimizer.n_iter = 0
            for _ in range(75):
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.n_iter += 1
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            optimizer.step()

            # Set back to eval mode
            model.eval()
            likelihood.eval()
            test_preds = likelihood(model(train_x)).mean.round()

            mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
            self.assertLess(mean_abs_error.item(), 1e-5)
예제 #8
0
    def test_kissgp_classification_error(self):
        model = GPClassificationModel()
        likelihood = BernoulliLikelihood()
        mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, n_data=len(train_y))

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        with gpytorch.settings.max_preconditioner_size(5):
            optimizer = optim.Adam(model.parameters(), lr=0.15)
            optimizer.n_iter = 0
            for _ in range(20):
                optimizer.zero_grad()
                output = model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.n_iter += 1
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

            # Set back to eval mode
            model.eval()
            likelihood.eval()

            test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze()
            mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
            self.assertLess(mean_abs_error.squeeze().item(), 1e-5)
예제 #9
0
 def testClassification(self):
     # Init
     target = 0.75
     model_gen_options = {"num_restarts": 1, "raw_samples": 3, "epochs": 5}
     lb = torch.tensor([0, 0])
     ub = torch.tensor([4, 4])
     m = MonotonicRejectionGP(
         lb=lb,
         ub=ub,
         likelihood=BernoulliLikelihood(),
         fixed_prior_mean=target,
         monotonic_idxs=[1],
         num_induc=2,
         num_samples=3,
         num_rejection_samples=4,
     )
     strat = Strategy(
         lb=lb,
         ub=ub,
         model=m,
         generator=MonotonicRejectionGenerator(
             MonotonicMCLSE,
             acqf_kwargs={
                 "target": target,
                 "objective": ProbitObjective()
             },
             model_gen_options=model_gen_options,
         ),
         n_trials=1,
     )
     # Fit
     train_x = torch.tensor([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]])
     train_y = torch.tensor([1.0, 1.0, 0.0])
     m.fit(train_x=train_x, train_y=train_y)
     self.assertEqual(m.inducing_points.shape, torch.Size([2, 2]))
     self.assertAlmostEqual(m.mean_module.constant.item(), norm.ppf(0.75))
     # Predict
     f, var = m.predict(train_x)
     self.assertEqual(f.shape, torch.Size([3]))
     self.assertEqual(var.shape, torch.Size([3]))
     # Gen
     strat.add_data(train_x, train_y)
     Xopt = strat.gen()
     self.assertEqual(Xopt.shape, torch.Size([1, 2]))
     # Acquisition function
     acq = strat.generator._instantiate_acquisition_fn(m)
     self.assertEqual(acq.deriv_constraint_points.shape, torch.Size([2, 3]))
     self.assertTrue(
         torch.equal(acq.deriv_constraint_points[:, -1], 2 * torch.ones(2)))
     self.assertEqual(acq.target, 0.75)
     self.assertTrue(isinstance(acq.objective, ProbitObjective))
     # Update
     m.update(train_x=train_x[:2, :2], train_y=train_y[:2], warmstart=True)
     self.assertEqual(m.train_inputs[0].shape, torch.Size([2, 3]))
예제 #10
0
    def test_kissgp_classification_error(self):
        with gpytorch.settings.use_toeplitz(False), gpytorch.settings.max_preconditioner_size(5):
            model = GPClassificationModel()
            likelihood = BernoulliLikelihood()
            mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y))

            # Find optimal model hyperparameters
            model.train()
            likelihood.train()

            optimizer = optim.Adam(model.parameters(), lr=0.14455771335700404)
            optimizer.n_iter = 0
            for _ in range(10):
                optimizer.zero_grad()
                # Get predictive output
                output = model(train_x)
                # Calc loss and backprop gradients
                loss = -mll(output, train_y).sum()
                loss.backward()
                optimizer.n_iter += 1
                optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

            # Set back to eval mode
            model.eval()
            likelihood.eval()

            test_preds = model(train_x).mean.ge(0.5).float()
            mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)

        self.assertLess(mean_abs_error.squeeze().item(), 0.15)
예제 #11
0
def test_kissgp_classification_error():
    with gpytorch.settings.use_toeplitz(False):
        model = GPClassificationModel()
        likelihood = BernoulliLikelihood()
        mll = gpytorch.mlls.VariationalMarginalLogLikelihood(
            likelihood, model, n_data=len(train_y))

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        optimizer = optim.Adam(model.parameters(), lr=0.15)
        optimizer.n_iter = 0
        for i in range(25):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.n_iter += 1
            optimizer.step()

        # Set back to eval mode
        model.eval()
        likelihood.eval()

        test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(
            1).squeeze()
        mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)

    assert (mean_abs_error.data.squeeze()[0] < 0.15)
    def test_kissgp_classification_error(self):
        model = GPClassificationModel()
        likelihood = BernoulliLikelihood()
        mll = gpytorch.mlls.VariationalMarginalLogLikelihood(
            likelihood,
            model,
            n_data=len(train_y),
        )

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        optimizer = optim.SGD(model.parameters(), lr=0.1)
        optimizer.n_iter = 0
        for _ in range(200):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.n_iter += 1
            optimizer.step()

        # Set back to eval mode
        model.eval()
        likelihood.eval()
        test_preds = (likelihood(
            model(train_x)).mean().ge(0.5).float().mul(2).sub(1).squeeze())
        mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2)
        self.assertLess(mean_abs_error.data.squeeze()[0], 1e-5)
def main():
    # Initialize classification model
    model = GPClassificationModel().cuda()

    # Likelihood is Bernoulli, warm predictive mean 
    likelihood = BernoulliLikelihood().cuda()

    if mode == 'Train':
        train_x, train_y = prepare_training_data()

        train(train_x, train_y, model, likelihood)

    #elif mode == 'Eval':
        print("start to test the model")
        predictions = eval_superpixels(model, likelihood)
        plot_result(predictions)
    else:
        raise Exception("No such mode")
    def _set_model(
        self,
        train_x: Tensor,
        train_y: Tensor,
        model_state_dict: Optional[Dict[str, Tensor]] = None,
        likelihood_state_dict: Optional[Dict[str, Tensor]] = None,
    ) -> None:
        # Augment the data with the derivative index
        train_x_aug = self._augment_with_deriv_index(train_x, 0)
        inducing_points_aug = self._augment_with_deriv_index(self.inducing_points, 0)
        # Create and fit the model
        scales = self.bounds_[1, :] - self.bounds_[0, :]
        fixed_prior_mean = self.fixed_prior_mean
        if fixed_prior_mean is not None and self.likelihood == "probit-bernoulli":
            fixed_prior_mean = norm.ppf(fixed_prior_mean)
        self.model = MixedDerivativeVariationalGP(
            train_x=train_x_aug,
            train_y=train_y.squeeze(),
            inducing_points=inducing_points_aug,
            scales=scales,
            fixed_prior_mean=fixed_prior_mean,
            covar_module=self.covar_module,
            mean_module=self.mean_module,
        )

        self.model_likelihood = (
            BernoulliLikelihood()
            if self.likelihood == "probit-bernoulli"
            else GaussianLikelihood()
        )
        # Set model parameters
        if model_state_dict is not None:
            self.model.load_state_dict(model_state_dict)
        if likelihood_state_dict is not None:
            self.model_likelihood.load_state_dict(likelihood_state_dict)

        # Fit!
        mll = VariationalELBO(
            likelihood=self.model_likelihood, model=self.model, num_data=train_y.numel()
        )
        mll = fit_gpytorch_model(mll)
예제 #15
0
 def testMixedDerivativeVariationalGP(self):
     train_x = torch.cat(
         (torch.tensor([1.0, 2.0, 3.0, 4.0]).unsqueeze(1), torch.zeros(
             4, 1)),
         dim=1)
     train_y = torch.tensor([1.0, 2.0, 3.0, 4.0])
     m = MixedDerivativeVariationalGP(
         train_x=train_x,
         train_y=train_y,
         inducing_points=train_x,
         fixed_prior_mean=0.5,
     )
     self.assertEqual(m.mean_module.constant.item(), 0.5)
     self.assertEqual(m.covar_module.base_kernel.raw_lengthscale.shape,
                      torch.Size([1, 1]))
     mll = VariationalELBO(likelihood=BernoulliLikelihood(),
                           model=m,
                           num_data=train_y.numel())
     mll = fit_gpytorch_model(mll)
     test_x = torch.tensor([[1.0, 0], [3.0, 1.0]])
     m(test_x)
    def __init__(
        self,
        monotonic_idxs: Sequence[int],
        lb: Union[np.ndarray, torch.Tensor],
        ub: Union[np.ndarray, torch.Tensor],
        dim: Optional[int] = None,
        mean_module: Optional[Mean] = None,
        covar_module: Optional[Kernel] = None,
        likelihood: Optional[Likelihood] = None,
        fixed_prior_mean: Optional[float] = None,
        num_induc: int = 25,
        num_samples: int = 250,
        num_rejection_samples: int = 5000,
    ) -> None:
        """Initialize MonotonicRejectionGP.

        Args:
            likelihood (str): Link function and likelihood. Can be 'probit-bernoulli' or
                'identity-gaussian'.
            monotonic_idxs (List[int]): List of which columns of x should be given monotonicity
            constraints.
            fixed_prior_mean (Optional[float], optional): Fixed prior mean. If classification, should be the prior
            classification probability (not the latent function value). Defaults to None.
            covar_module (Optional[Kernel], optional): Covariance kernel to use (default: scaled RBF).
            mean_module (Optional[Mean], optional): Mean module to use (default: constant mean).
            num_induc (int, optional): Number of inducing points for variational GP.]. Defaults to 25.
            num_samples (int, optional): Number of samples for estimating posterior on preDict or
            acquisition function evaluation. Defaults to 250.
            num_rejection_samples (int, optional): Number of samples used for rejection sampling. Defaults to 4096.
            acqf (MonotonicMCAcquisition, optional): Acquisition function to use for querying points. Defaults to MonotonicMCLSE.
            objective (Optional[MCAcquisitionObjective], optional): Transformation of GP to apply before computing acquisition function. Defaults to identity transform for gaussian likelihood, probit transform for probit-bernoulli.
            extra_acqf_args (Optional[Dict[str, object]], optional): Additional arguments to pass into the acquisition function. Defaults to None.
        """
        self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim)
        if likelihood is None:
            likelihood = BernoulliLikelihood()

        self.inducing_size = num_induc
        inducing_points = self._select_inducing_points(method="sobol")

        inducing_points_aug = self._augment_with_deriv_index(
            inducing_points, 0)
        variational_distribution = CholeskyVariationalDistribution(
            inducing_points_aug.size(0))
        variational_strategy = VariationalStrategy(
            model=self,
            inducing_points=inducing_points_aug,
            variational_distribution=variational_distribution,
            learn_inducing_locations=False,
        )

        if mean_module is None:
            mean_module = ConstantMeanPartialObsGrad()

        if fixed_prior_mean is not None:
            if isinstance(likelihood, BernoulliLikelihood):
                fixed_prior_mean = norm.ppf(fixed_prior_mean)
            mean_module.constant.requires_grad_(False)
            mean_module.constant.copy_(torch.tensor([fixed_prior_mean]))

        if covar_module is None:

            ls_prior = gpytorch.priors.GammaPrior(concentration=4.6,
                                                  rate=1.0,
                                                  transform=lambda x: 1 / x)
            ls_prior_mode = ls_prior.rate / (ls_prior.concentration + 1)
            ls_constraint = gpytorch.constraints.Positive(
                transform=None, initial_value=ls_prior_mode)

            covar_module = gpytorch.kernels.ScaleKernel(
                RBFKernelPartialObsGrad(
                    lengthscale_prior=ls_prior,
                    lengthscale_constraint=ls_constraint,
                    ard_num_dims=dim,
                ),
                outputscale_prior=gpytorch.priors.SmoothedBoxPrior(a=1, b=4),
            )

        super().__init__(variational_strategy)

        self.bounds_ = torch.stack([self.lb, self.ub])
        self.mean_module = mean_module
        self.covar_module = covar_module
        self.likelihood = likelihood

        self.num_induc = num_induc
        self.monotonic_idxs = monotonic_idxs
        self.num_samples = num_samples
        self.num_rejection_samples = num_rejection_samples
        self.fixed_prior_mean = fixed_prior_mean
        self.inducing_points = inducing_points
        super().__init__(variational_strategy)
        self.mean = ConstantMean()
        self.covar = ScaleKernel(RBFKernel())

    def forward(self, x):
        x_mean = self.mean(x)
        x_covar = self.covar(x)
        return MultivariateNormal(x_mean, x_covar)


x_train = torch.linspace(0, 1, 10)
y_train = torch.sign(torch.cos(x_train * (4 * math.pi))).add(1).div(2)

# Initialize model and likelihood
model = GaussianProcessClassification(x_train)
likelihood = BernoulliLikelihood()

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

# "Loss" for GPs - the marginal log likelihood
# num_data refers to the number of training datapoints
mll = gpytorch.mlls.VariationalELBO(likelihood, model, y_train.numel())

n_iterations = 100
for i in range(n_iterations):
    # Zero backpropped gradients from previous iteration
class GPClassifier(ApproximateGP):

    _num_outputs = 1  # to inform GPyTorchModel API

    def __init__(self,
                 dim: int,
                 train_X: Tensor,
                 train_Y: Tensor,
                 options: dict,
                 which_type: Optional[str] = "obj") -> None:

        variational_distribution = CholeskyVariationalDistribution(
            train_X.size(0))
        variational_strategy = UnwhitenedVariationalStrategy(
            self,
            train_X,
            variational_distribution,
            learn_inducing_locations=False)
        super(GPClassifier, self).__init__(variational_strategy)

        self.dim = dim

        # pdb.set_trace()
        if len(train_X) == 0:  # No data case
            train_X = None
            train_Y = None
            self.train_inputs = None
            self.train_targets = None
            self.train_x = None
            self.train_yl = None
        else:
            # Error checking:
            assert train_Y.dim() == 1, "train_Y is required to be 1D"
            assert train_X.shape[
                -1] == self.dim, "Input dimensions do not agree ... (!)"
            self.train_inputs = [train_X.clone()]
            self.train_targets = train_Y.clone()
            self.train_x = train_X.clone()
            self.train_yl = torch.cat(
                [torch.zeros((len(train_Y)), 1),
                 train_Y.view(-1, 1)], dim=1)

        print("\n")
        logger.info("### Initializing GP classifier for constraint g(x) ###")

        # Likelihood:
        noise_std = options.hyperpars.noise_std.value
        self.likelihood = BernoulliLikelihood()

        # For compatibility:
        self.threshold = torch.tensor([float("Inf")])

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Initialize prior mean:
        # self.mean_module = ConstantMean()
        self.mean_module = ZeroMean()

        # Initialize covariance function:
        base_kernel = MaternKernel(nu=2.5,
                                   ard_num_dims=self.dim,
                                   lengthscale=0.1 * torch.ones(self.dim))
        self.covar_module = ScaleKernel(base_kernel=base_kernel)

        self.disp_info_scipy_opti = True

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        self.Nrestarts = options.hyperpars.optimization.Nrestarts

        self._update_hyperparameters()

        self.eval()
        self.likelihood.eval()

        # pdb.set_trace()

    def set_hyperparameters(self, lengthscale, outputscale, noise):
        self.covar_module.base_kernel.lengthscale = lengthscale
        self.covar_module.outputscale = outputscale
        # self.likelihood.noise[:] = noise
        # self.mean_module.constant[:] = 0.0 # Assume zero mean

    def display_hyperparameters(self):
        logger.info("  Re-optimized hyperparameters")
        logger.info("  ----------------------------")
        logger.info("    Outputscale (stddev) | {0:2.4f}".format(
            self.covar_module.outputscale.item()))
        logger.info("    Lengthscale(s)       | " +
                    str(self.covar_module.base_kernel.lengthscale.detach().cpu(
                    ).numpy().flatten()))

    def logging(self):
        log_out = dict()
        log_out[
            "lengthscale"] = self.covar_module.base_kernel.lengthscale.detach(
            ).cpu().numpy()
        log_out["outputscale"] = self.covar_module.outputscale.item()
        # log_out["noise"] = self.likelihood.noise.detach().cpu().numpy()
        log_out[
            "train_inputs"] = None if self.train_inputs is None else self.train_inputs[
                0].detach().cpu().numpy()
        log_out[
            "train_targets"] = None if self.train_targets is None else self.train_targets.detach(
            ).cpu().numpy()

        return log_out

    def _update_hyperparameters(self):

        # Find optimal model hyperparameters
        self.train()
        self.likelihood.train()

        # Use the adam optimizer
        optimizer = Adam(self.parameters(), lr=0.1)

        # "Loss" for GPs - the marginal log likelihood
        # num_data refers to the number of training datapoints
        mll = VariationalELBO(self.likelihood, self,
                              self.train_targets.numel())

        training_iterations = 50
        for i in range(training_iterations):
            # Zero backpropped gradients from previous iteration
            optimizer.zero_grad()
            # Get predictive output
            output = self(self.train_inputs[0])
            # Calc loss and backprop gradients
            loss = -mll(output, self.train_targets)
            loss.backward()
            # print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
            optimizer.step()

    def _optimize_acqui_use_restarts_individually(self):

        # Get initial random restart points:
        logger.info("  Generating random restarts ...")
        options = {
            "maxiter": 200,
            "ftol": 1e-9,
            "method": "L-BFGS-B",
            "iprint": 2,
            "maxls": 20,
            "disp": self.disp_info_scipy_opti
        }
        bounds = torch.tensor(self.hyperpars_bounds,
                              device=device,
                              dtype=dtype)
        initial_conditions = gen_batch_initial_conditions(
            acq_function=self.mll_objective,
            bounds=bounds,
            q=1,
            num_restarts=self.Nrestarts,
            raw_samples=500,
            options=options)

        logger.info(
            "  Optimizing loss function with {0:d} restarts ...".format(
                self.Nrestarts))
        new_hyperpars_many = torch.zeros(size=(self.Nrestarts, 1,
                                               self.dim_hyperpars))
        new_hyperpars_loss_many = torch.zeros(size=(self.Nrestarts, ))

        new_hyperpars, _ = self.opti_hyperpars.run_optimization(
            x_restarts=initial_conditions.view(self.Nrestarts,
                                               self.dim_hyperpars))

        logger.info("  Done!")

        return new_hyperpars

    def _get_hyperparameters_bounds(self, hyperpriors):

        # Compute the domain for hyperparameter search by truncating the support of the corresponding hyperprior at the .75 quantile
        # The lower bound is necessary for numerical stability, i.e., when computing logpdf() in classireg.models.mll_gpcr.log_marginal()
        # All values of the dictionary are defined as double lists
        hyperpriors_support = dict(
            lengthscales=[[0.001] * self.dim,
                          [hyperpriors["lengthscales"].ppf(.75)] * self.dim],
            outputscale=[[0.001], [hyperpriors["outputscale"].ppf(.75)]])

        # Automatically get the bounds from the dictionary:
        hyperpars_lb = []
        hyperpars_ub = []
        for hyperpar in hyperpriors_support.values():
            hyperpars_lb += hyperpar[0]
            hyperpars_ub += hyperpar[1]
        hyperpars_bounds = [hyperpars_lb, hyperpars_ub]

        return hyperpars_bounds

    def _sample_hyperparameters_within_bounds(self, Nsamples):

        # Get a sample from the prior for initialization:
        new_seed = torch.randint(low=0, high=100000, size=(1, )).item(
        )  # Top-level seeds have an impact on this one herein; contrary to the case new_seed = None
        hyperpars_restarts = draw_sobol_samples(bounds=torch.tensor(
            self.hyperpars_bounds),
                                                n=Nsamples,
                                                q=1,
                                                seed=new_seed)
        hyperpars_restarts = hyperpars_restarts.squeeze(
            1)  # Remove batch dimension [n q dim] -> [n dim]

        return hyperpars_restarts

    def forward(self, x):

        # A `num_restarts x q x d` tensor of initial conditions.

        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        mvn = MultivariateNormal(mean_x, covar_x)
        return mvn

    def plot(self,
             axes=None,
             block=False,
             Ndiv=100,
             legend=True,
             title="GPgrad",
             plotting=True,
             plotCDF=False,
             clear_axes=False,
             Nsamples=None,
             ylabel=None,
             ylim=None,
             pause=None,
             showtickslabels_x=True,
             xlabel=None,
             labelsize=None,
             showtickslabels=None,
             showticks=None,
             linewidth=None,
             color=None,
             prob=False):
        '''
		This function hardcodes the plotting limits between zero and one for now
		'''
        if plotting == False or self.dim > 1:
            return

        pp = PlotProbability()
        xpred_vec = torch.linspace(0.0, 1.0, Ndiv)[:, None]
        xpred_vec = xpred_vec.unsqueeze(
            0)  # Ndiv batches of [q=1 x self.dim] dimensions each

        mvn_cons = self(xpred_vec)
        pred_lik = self.likelihood(mvn_cons)
        mean_vec = pred_lik.mean

        # Get upper and lower confidence bounds (2 standard deviations from the mean):
        var_vec = pred_lik.variance
        std_vec = var_vec.sqrt()
        lower_ci, upper_ci = mean_vec - std_vec, mean_vec + std_vec

        if self.dim == 1:
            axes = pp.plot_GP_1D(
                xpred_vec=xpred_vec.squeeze().cpu().numpy(),
                fpred_mode_vec=mean_vec.squeeze().detach().cpu().numpy(),
                fpred_quan_minus=lower_ci.squeeze().detach().cpu().numpy(),
                fpred_quan_plus=upper_ci.squeeze().detach().cpu().numpy(),
                X_sta=None if self.train_inputs is None else
                self.train_inputs[0].detach().cpu().numpy(),
                Y_sta=None if self.train_targets is None else
                self.train_targets.detach().cpu().numpy(),
                title=title,
                axes=axes,
                block=block,
                legend=legend,
                clear_axes=True,
                xlabel=xlabel,
                ylabel=ylabel,
                xlim=np.array([0., 1.]),
                ylim=ylim,
                labelsize="x-large",
                legend_loc="best",
                colormap="paper",
                showtickslabels_x=showtickslabels_x)

            if Nsamples is not None:
                f_sample = posterior.sample(
                    sample_shape=torch.Size([Nsamples]))
                for k in range(Nsamples):
                    axes.plot(xpred_vec.squeeze().detach().cpu().numpy(),
                              f_sample[k, 0, :, 0],
                              linestyle="--",
                              linewidth=1.0,
                              color="sienna")

        elif self.dim == 2:
            pass

        plt.show(block=block)
        if pause is not None:
            plt.pause(pause)

        return axes
    def __init__(self,
                 dim: int,
                 train_X: Tensor,
                 train_Y: Tensor,
                 options: dict,
                 which_type: Optional[str] = "obj") -> None:

        variational_distribution = CholeskyVariationalDistribution(
            train_X.size(0))
        variational_strategy = UnwhitenedVariationalStrategy(
            self,
            train_X,
            variational_distribution,
            learn_inducing_locations=False)
        super(GPClassifier, self).__init__(variational_strategy)

        self.dim = dim

        # pdb.set_trace()
        if len(train_X) == 0:  # No data case
            train_X = None
            train_Y = None
            self.train_inputs = None
            self.train_targets = None
            self.train_x = None
            self.train_yl = None
        else:
            # Error checking:
            assert train_Y.dim() == 1, "train_Y is required to be 1D"
            assert train_X.shape[
                -1] == self.dim, "Input dimensions do not agree ... (!)"
            self.train_inputs = [train_X.clone()]
            self.train_targets = train_Y.clone()
            self.train_x = train_X.clone()
            self.train_yl = torch.cat(
                [torch.zeros((len(train_Y)), 1),
                 train_Y.view(-1, 1)], dim=1)

        print("\n")
        logger.info("### Initializing GP classifier for constraint g(x) ###")

        # Likelihood:
        noise_std = options.hyperpars.noise_std.value
        self.likelihood = BernoulliLikelihood()

        # For compatibility:
        self.threshold = torch.tensor([float("Inf")])

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Initialize prior mean:
        # self.mean_module = ConstantMean()
        self.mean_module = ZeroMean()

        # Initialize covariance function:
        base_kernel = MaternKernel(nu=2.5,
                                   ard_num_dims=self.dim,
                                   lengthscale=0.1 * torch.ones(self.dim))
        self.covar_module = ScaleKernel(base_kernel=base_kernel)

        self.disp_info_scipy_opti = True

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        self.Nrestarts = options.hyperpars.optimization.Nrestarts

        self._update_hyperparameters()

        self.eval()
        self.likelihood.eval()
 def __init__(self):
     super(GPClassificationModel, self).__init__(BernoulliLikelihood())
     self.latent_function = LatentFunction()
예제 #21
0
 def create_likelihood(self):
     return BernoulliLikelihood()
예제 #22
0
    def __init__(
        self,
        lb: Union[np.ndarray, torch.Tensor],
        ub: Union[np.ndarray, torch.Tensor],
        dim: Optional[int] = None,
        mean_module: Optional[gpytorch.means.Mean] = None,
        covar_module: Optional[gpytorch.kernels.Kernel] = None,
        likelihood: Optional[Likelihood] = None,
        inducing_size: int = 100,
        max_fit_time: Optional[float] = None,
        inducing_point_method: str = "auto",
    ):
        """Initialize the GP Classification model

        Args:
            lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters.
            ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters.
            dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size
                of lb and ub.
            mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior.
            covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a
                gamma prior.
            likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to
                Bernouli likelihood.
            inducing_size (int): Number of inducing points. Defaults to 100.
            max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None,
                there is no limit to the fitting time.
            inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto".
                If "sobol", a number of Sobol points equal to inducing_size will be selected.
                If "pivoted_chol", selects points based on the pivoted Cholesky heuristic.
                If "kmeans++", selects points by performing kmeans++ clustering on the training data.
                If "auto", tries to determine the best method automatically.
        """
        self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim)
        self.max_fit_time = max_fit_time
        self.inducing_size = inducing_size

        if likelihood is None:
            likelihood = BernoulliLikelihood()

        self.max_fit_time = max_fit_time
        self.inducing_point_method = inducing_point_method
        # initialize to sobol before we have data
        inducing_points = self._select_inducing_points(method="sobol")

        variational_distribution = CholeskyVariationalDistribution(
            inducing_points.size(0),
            batch_shape=torch.Size([self._batch_size]))
        variational_strategy = VariationalStrategy(
            self,
            inducing_points,
            variational_distribution,
            learn_inducing_locations=False,
        )
        super().__init__(variational_strategy)

        if mean_module is None or covar_module is None:
            config = Config(
                config_dict={
                    "default_mean_covar_factory": {
                        "lb": str(self.lb.tolist()),
                        "ub": str(self.ub.tolist()),
                    }
                })  # type: ignore
            default_mean, default_covar = default_mean_covar_factory(config)

        self.mean_module = mean_module or default_mean
        self.covar_module = covar_module or default_covar
        self.likelihood = likelihood

        self._fresh_state_dict = deepcopy(self.state_dict())
        self._fresh_likelihood_dict = deepcopy(self.likelihood.state_dict())