def test_setters(self):
        likelihood = MultitaskGaussianLikelihood(num_tasks=3, rank=0)

        a = torch.randn(3, 2)
        mat = a.matmul(a.transpose(-1, -2))

        # test rank 0 setters
        likelihood.noise = 0.5
        self.assertAlmostEqual(0.5, likelihood.noise.item())

        likelihood.task_noises = torch.tensor([0.04, 0.04, 0.04])
        for i in range(3):
            self.assertAlmostEqual(0.04, likelihood.task_noises[i].item())

        with self.assertRaises(AttributeError) as context:
            likelihood.task_noise_covar = mat
        self.assertTrue("task noises" in str(context.exception))

        # test low rank setters
        likelihood = MultitaskGaussianLikelihood(num_tasks=3, rank=2)
        likelihood.noise = 0.5
        self.assertAlmostEqual(0.5, likelihood.noise.item())

        likelihood.task_noise_covar = mat
        self.assertAllClose(mat, likelihood.task_noise_covar)

        with self.assertRaises(AttributeError) as context:
            likelihood.task_noises = torch.tensor([0.04, 0.04, 0.04])
        self.assertTrue("task noises" in str(context.exception))
 def __init__(self, train_X, train_Y):
     d = train_X.shape[-1]
     likelihood = MultitaskGaussianLikelihood(num_tasks=1 + d)
     super(GPWithDerivatives, self).__init__(train_X, train_Y, likelihood)
     self.mean_module = gpytorch.means.ConstantMeanGrad()
     self.base_kernel = gpytorch.kernels.RBFKernelGrad(ard_num_dims=d)
     self.covar_module = gpytorch.kernels.ScaleKernel(self.base_kernel)
Example #3
0
    def __init__(self,
                 latent_dimensions,
                 output_dimensions,
                 n_observations,
                 projection_dimensions=None,
                 n_inducing=50,
                 **kwargs):
        if "likelihood" in kwargs:
            raise Exception("Likelihood should not be set for the GP-LVM")
        kwargs["likelihood"] = MultitaskGaussianLikelihood(
            num_tasks=output_dimensions)

        super().__init__(**kwargs)

        self.Q = latent_dimensions
        self.D = output_dimensions
        self.N = n_observations
        self.K = projection_dimensions or self.D

        if projection_dimensions is not None:
            L = torch.zeros(self.K, self.D) + 0.1
            self.register_parameter("L", nn.Parameter(L))
        else:
            self.L = None

        svgp = SVGP(self.Q, self.K, n_inducing=n_inducing, collapsed=False)
        self.add_gp(svgp)
        self.latent_layer = LatentLayer(self.N, self.Q)
Example #4
0
    def test_train_and_eval(self):
        # We're manually going to set the hyperparameters to something they shouldn't be
        likelihood = MultitaskGaussianLikelihood(num_tasks=4)
        model = LMCModel()

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()
        optimizer = torch.optim.Adam([
            {'params': model.parameters()},
            {'params': likelihood.parameters()},
        ], lr=0.01)

        # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO
        mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))

        # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less
        # effective for VI.
        for i in range(400):
            # Within each iteration, we will go over each minibatch of data
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

        # Test the model
        model.eval()
        likelihood.eval()

        # Make predictions for both sets of test points, and check MAEs.
        with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1):
            batch_predictions = likelihood(model(train_x))
            preds1 = batch_predictions.mean[:, 0]
            preds2 = batch_predictions.mean[:, 1]
            preds3 = batch_predictions.mean[:, 2]
            preds4 = batch_predictions.mean[:, 3]
            mean_abs_error1 = torch.mean(torch.abs(train_y[..., 0] - preds1))
            mean_abs_error2 = torch.mean(torch.abs(train_y[..., 1] - preds2))
            mean_abs_error3 = torch.mean(torch.abs(train_y[..., 2] - preds3))
            mean_abs_error4 = torch.mean(torch.abs(train_y[..., 3] - preds4))
            self.assertLess(mean_abs_error1.squeeze().item(), 0.15)
            self.assertLess(mean_abs_error2.squeeze().item(), 0.15)
            self.assertLess(mean_abs_error3.squeeze().item(), 0.15)
            self.assertLess(mean_abs_error4.squeeze().item(), 0.15)

            # Smoke test for getting predictive uncertainties
            lower, upper = batch_predictions.confidence_region()
            self.assertEqual(lower.shape, train_y.shape)
            self.assertEqual(upper.shape, train_y.shape)
    def test_train_on_single_set_test_on_batch(self):
        # We're manually going to set the hyperparameters to something they shouldn't be
        likelihood = MultitaskGaussianLikelihood(
            log_noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(1),
                                                        scale=torch.ones(1),
                                                        log_transform=True),
            num_tasks=2,
        )
        gp_model = ExactGPModel(train_x1, train_y1, likelihood)
        mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model)

        # Find optimal model hyperparameters
        gp_model.train()
        likelihood.train()
        optimizer = optim.Adam(list(gp_model.parameters()) +
                               list(likelihood.parameters()),
                               lr=0.1)
        optimizer.n_iter = 0
        gp_model.train()
        likelihood.train()
        optimizer = optim.Adam(gp_model.parameters(), lr=0.1)
        for _ in range(50):
            optimizer.zero_grad()
            output = gp_model(train_x1)
            loss = -mll(output, train_y1).sum()
            loss.backward()
            optimizer.step()

            for param in gp_model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

        # Test the model
        gp_model.eval()
        likelihood.eval()

        # Make predictions for both sets of test points, and check MAEs.
        batch_predictions = likelihood(gp_model(test_x12))
        preds1 = batch_predictions.mean[0]
        preds2 = batch_predictions.mean[1]
        mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1))
        mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2))
        self.assertLess(mean_abs_error1.squeeze().item(), 0.05)
        self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
Example #6
0
def get_model(*, train_x, train_y, rank, num_mixtures, X_scaler):
    likelihood = MultitaskGaussianLikelihood(num_tasks=train_y.shape[1],
                                             noise_constraint=Interval(
                                                 1e-10, 1.0))
    model = MultitaskGPModel(
        train_x,
        train_y,
        likelihood,
        rank=rank,
        num_mixtures=num_mixtures,
        X_scaler=X_scaler,
    )
    if IS_CUDA:
        model = model.cuda(device=DEVICE)
        likelihood = likelihood.cuda(device=DEVICE)

    return model, likelihood
Example #7
0
 def __init__(self, dim):
     # squeeze output dim before passing train_Y to ExactGP
     # super().__init__(train_X, train_Y.squeeze(-1), GaussianLikelihood())
     # super().__init__(train_X, train_Y, MultitaskGaussianLikelihood(num_tasks=1+train_X.shape[-1]))
     self.likelihood = MultitaskGaussianLikelihood(num_tasks=1 + dim)
     self.mean_module = ConstantMeanGrad()
     base_kernel = RBFKernelGrad(ard_num_dims=dim)
     self.covar_module = ScaleKernel(base_kernel=base_kernel)
     # self.to(train_X)  # make sure we're on the right device/dtype
     self.dim = dim
    def test_multitask_gp_mean_abs_error(self, cuda=False):
        train_x, train_y = self._get_data(cuda=cuda)
        likelihood = MultitaskGaussianLikelihood(num_tasks=2)
        model = MultitaskGPModel(train_x, train_y, likelihood)

        if cuda:
            model.cuda()

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        # Use the adam optimizer
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=0.1)  # Includes GaussianLikelihood parameters

        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

        n_iter = 50
        for _ in range(n_iter):
            # Zero prev backpropped gradients
            optimizer.zero_grad()
            # Make predictions from training data
            # Again, note feeding duplicated x_data and indices indicating which task
            output = model(train_x)
            # TODO: Fix this view call!!
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()

        # Test the model
        model.eval()
        likelihood.eval()

        test_x = torch.linspace(
            0,
            1,
            51,
            device=torch.device("cuda") if cuda else torch.device("cpu"))
        test_y1 = torch.sin(test_x * (2 * pi))
        test_y2 = torch.cos(test_x * (2 * pi))
        test_preds = likelihood(model(test_x)).mean
        mean_abs_error_task_1 = torch.mean(
            torch.abs(test_y1 - test_preds[:, 0]))
        mean_abs_error_task_2 = torch.mean(
            torch.abs(test_y2 - test_preds[:, 1]))

        self.assertLess(mean_abs_error_task_1.squeeze().item(), 0.05)
        self.assertLess(mean_abs_error_task_2.squeeze().item(), 0.05)
Example #9
0
    def __init__(self, input_size, target_size, device='cpu'):
        if device == 'gpu' and torch.cuda.is_available():
            self.device = torch.device('cuda:0')
        else:
            self.device = torch.device('cpu')

        self.input_size = input_size
        self.target_size = target_size

        _likelihood = MultitaskGaussianLikelihood(num_tasks=self.target_size)
        super(MultiTaskGPRegressor, self).__init__(train_inputs=None,
                                                   train_targets=None,
                                                   likelihood=_likelihood)

        self.mean_module = MultitaskMean(ZeroMean(), num_tasks=self.target_size)
        self.covar_module = MultitaskKernel(RBFKernel(), num_tasks=self.target_size, rank=1)

        self.input_trans = None
        self.target_trans = None
Example #10
0
    def __init__(self, input_size, target_size, device='cpu'):
        if device == 'gpu' and torch.cuda.is_available():
            self.device = torch.device('cuda:0')
        else:
            self.device = torch.device('cpu')

        self.input_size = input_size
        self.target_size = target_size

        _likelihood = MultitaskGaussianLikelihood(num_tasks=self.target_size)
        super(GPListRegressor, self).__init__(train_inputs=None,
                                              train_targets=None,
                                              likelihood=_likelihood)

        self.mean_module = ConstantMean(batch_shape=torch.Size([self.target_size]))
        self.covar_module = ScaleKernel(RBFKernel(batch_shape=torch.Size([self.target_size])),
                                        batch_shape=torch.Size([self.target_size]))

        self.input_trans = None
        self.target_trans = None
Example #11
0
    def __init__(
        self,
        model: Optional[ApproximateGP] = None,
        likelihood: Optional[Likelihood] = None,
        num_outputs: int = 1,
        *args,
        **kwargs,
    ) -> None:
        r"""
        Botorch wrapper class for various (variational) approximate GP models in
        gpytorch. This can either include stochastic variational GPs (SVGPs) or
        variational implementations of weight space approximate GPs.

        Args:
            model: Instance of gpytorch.approximate GP models. If omitted,
                constructs a `_SingleTaskVariationalGP`.
            likelihood: Instance of a GPyYorch likelihood. If omitted, uses a
                either a `GaussianLikelihood` (if `num_outputs=1`) or a
                `MultitaskGaussianLikelihood`(if `num_outputs>1`).
            num_outputs: Number of outputs expected for the GP model.
            args: Optional positional arguments passed to the
                `_SingleTaskVariationalGP` constructor if no model is provided.
            kwargs: Optional keyword arguments passed to the
                `_SingleTaskVariationalGP` constructor if no model is provided.
        """
        super().__init__()

        if model is None:
            model = _SingleTaskVariationalGP(num_outputs=num_outputs,
                                             *args,
                                             **kwargs)

        if likelihood is None:
            if num_outputs == 1:
                likelihood = GaussianLikelihood()
            else:
                likelihood = MultitaskGaussianLikelihood(num_tasks=num_outputs)

        self.model = model
        self.likelihood = likelihood
        self._desired_num_outputs = num_outputs
Example #12
0
    def test_train_and_eval(self):
        # We're manually going to set the hyperparameters to something they shouldn't be
        likelihood = MultitaskGaussianLikelihood(num_tasks=2)
        gp_model = ExactGPModel(train_x, train_y12, likelihood)
        mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model)

        # Find optimal model hyperparameters
        gp_model.train()
        likelihood.train()
        optimizer = optim.Adam(gp_model.parameters(), lr=0.1)
        optimizer.n_iter = 0
        for _ in range(75):
            optimizer.zero_grad()
            output = gp_model(train_x)
            loss = -mll(output, train_y12).sum()
            loss.backward()
            optimizer.step()

            for param in gp_model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

        # Test the model
        gp_model.eval()
        likelihood.eval()

        # Make predictions for both sets of test points, and check MAEs.
        with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1):
            batch_predictions = likelihood(gp_model(test_x))
            preds1 = batch_predictions.mean[:, 0]
            preds2 = batch_predictions.mean[:, 1]
            mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1))
            mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2))
            self.assertLess(mean_abs_error1.squeeze().item(), 0.01)
            self.assertLess(mean_abs_error2.squeeze().item(), 0.01)

            # Smoke test for getting predictive uncertainties
            lower, upper = batch_predictions.confidence_region()
            self.assertEqual(lower.shape, test_y12.shape)
            self.assertEqual(upper.shape, test_y12.shape)
Example #13
0
    def test_multitask_low_rank_noise_covar(self):
        likelihood = MultitaskGaussianLikelihood(n_tasks=2, rank=1)
        model = MultitaskGPModel(train_x, train_y, likelihood)
        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        # Use the adam optimizer
        optimizer = torch.optim.Adam(
            [{
                "params": model.parameters()
            }],  # Includes GaussianLikelihood parameters
            lr=0.1,
        )

        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

        n_iter = 50
        for _ in range(n_iter):
            # Zero prev backpropped gradients
            optimizer.zero_grad()
            # Make predictions from training data
            # Again, note feeding duplicated x_data and indices indicating which task
            output = model(train_x)
            # TODO: Fix this view call!!
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()

        # Test the model
        model.eval()
        likelihood.eval()

        n_tasks = 2
        task_noise_covar_factor = likelihood.task_noise_covar_factor
        log_noise = likelihood.log_noise
        task_noise_covar = task_noise_covar_factor.matmul(
            task_noise_covar_factor.transpose(
                -1, -2)) + log_noise.exp() * torch.eye(n_tasks)

        self.assertGreater(task_noise_covar[0, 1].data.squeeze().item(), 0.05)
    def test_lcm_icm_equivalence(self):
        # Training points are every 0.1 in [0,1] (note that they're the same for both tasks)
        train_x = torch.linspace(0, 1, 100)
        # y1 function is sin(2*pi*x) with noise N(0, 0.04)
        train_y1 = torch.sin(train_x.data *
                             (2 * math.pi)) + torch.randn(train_x.size()) * 0.2
        # y2 function is cos(2*pi*x) with noise N(0, 0.04)
        train_y2 = torch.cos(train_x.data *
                             (2 * math.pi)) + torch.randn(train_x.size()) * 0.2
        # Create a train_y which interleaves the two
        train_y = torch.stack([train_y1, train_y2], -1)

        likelihood = MultitaskGaussianLikelihood(num_tasks=2)
        model = MultitaskGPModel(train_x, train_y, likelihood)

        # Use the adam optimizer
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=0.1)  # Includes GaussianL^ikelihood parameters
        model.train()
        likelihood.train()
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
        n_iter = 50
        for _ in range(n_iter):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()
        model.eval()
        likelihood.eval()

        # Make predictions for LCM
        with torch.no_grad():
            test_x = torch.linspace(0, 1, 51)
            observed_pred = likelihood(model(test_x))
            mean = observed_pred.mean

        model_icm = MultitaskGPModel_ICM(train_x, train_y, likelihood)
        likelihood = MultitaskGaussianLikelihood(num_tasks=2)
        model_icm.train()
        likelihood.train()
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model_icm)
        optimizer = torch.optim.Adam(
            model_icm.parameters(),
            lr=0.1)  # Includes GaussianLikelihood parameters
        for _ in range(n_iter):
            optimizer.zero_grad()
            output = model_icm(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()
        model_icm.eval()
        likelihood.eval()

        # Make predictions for ICM
        with torch.no_grad():
            test_x = torch.linspace(0, 1, 51)
            observed_pred_icm = likelihood(model_icm(test_x))
            mean_icm = observed_pred_icm.mean

        # Make sure predictions from LCM with one base kernel and ICM are the same.
        self.assertLess((mean - mean_icm).pow(2).mean(), 1e-2)
 def create_likelihood(self):
     return MultitaskGaussianLikelihood(num_tasks=4, rank=2, batch_shape=torch.Size([2, 3]))
 def create_likelihood(self):
     return MultitaskGaussianLikelihood(num_tasks=4, rank=2)
Example #17
0
 def create_model(self, train_x, train_y):
     likelihood = MultitaskGaussianLikelihood(num_tasks=2)
     model = ExactMultiTaskGPModel(train_x, train_y, likelihood)
     return model
Example #18
0
    def test_KroneckerMultiTaskGP_custom(self):
        for batch_shape, dtype in itertools.product(
            (torch.Size(),
             ),  # torch.Size([3])), TODO: Fix and test batch mode
            (torch.float, torch.double),
        ):
            tkwargs = {"device": self.device, "dtype": dtype}

            # initialization with custom settings
            likelihood = MultitaskGaussianLikelihood(
                num_tasks=2,
                rank=1,
                batch_shape=batch_shape,
            )
            data_covar_module = MaternKernel(
                nu=1.5,
                lengthscale_prior=GammaPrior(2.0, 4.0),
            )
            task_covar_prior = LKJCovariancePrior(
                n=2,
                eta=0.5,
                sd_prior=SmoothedBoxPrior(math.exp(-3), math.exp(2), 0.1),
            )
            model_kwargs = {
                "likelihood": likelihood,
                "data_covar_module": data_covar_module,
                "task_covar_prior": task_covar_prior,
                "rank": 1,
            }

            model, train_X, _ = _get_kronecker_model_and_training_data(
                model_kwargs=model_kwargs, batch_shape=batch_shape, **tkwargs)
            self.assertIsInstance(model, KroneckerMultiTaskGP)
            self.assertEqual(model.num_outputs, 2)
            self.assertIsInstance(model.likelihood,
                                  MultitaskGaussianLikelihood)
            self.assertEqual(model.likelihood.rank, 1)
            self.assertIsInstance(model.mean_module, MultitaskMean)
            self.assertIsInstance(model.covar_module, MultitaskKernel)
            base_kernel = model.covar_module
            self.assertIsInstance(base_kernel.data_covar_module, MaternKernel)
            self.assertIsInstance(base_kernel.task_covar_module, IndexKernel)
            task_covar_prior = base_kernel.task_covar_module.IndexKernelPrior
            self.assertIsInstance(task_covar_prior, LKJCovariancePrior)
            self.assertEqual(task_covar_prior.correlation_prior.eta, 0.5)
            lengthscale_prior = base_kernel.data_covar_module.lengthscale_prior
            self.assertIsInstance(lengthscale_prior, GammaPrior)
            self.assertEqual(lengthscale_prior.concentration, 2.0)
            self.assertEqual(lengthscale_prior.rate, 4.0)
            self.assertEqual(
                base_kernel.task_covar_module.covar_factor.shape[-1], 1)

            # test model fitting
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=OptimizationWarning)
                mll = fit_gpytorch_model(mll,
                                         options={"maxiter": 1},
                                         max_retries=1)

            # test posterior
            test_x = torch.rand(2, 2, **tkwargs)
            posterior_f = model.posterior(test_x)
            self.assertIsInstance(posterior_f, GPyTorchPosterior)
            self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal)
            self.assertEqual(posterior_f.mean.shape, torch.Size([2, 2]))
            self.assertEqual(posterior_f.variance.shape, torch.Size([2, 2]))

            # test observation noise
            posterior_noisy = model.posterior(test_x, observation_noise=True)
            self.assertTrue(
                torch.allclose(posterior_noisy.variance,
                               model.likelihood(posterior_f.mvn).variance))

            # test posterior (batch eval)
            test_x = torch.rand(3, 2, 2, **tkwargs)
            posterior_f = model.posterior(test_x)
            self.assertIsInstance(posterior_f, GPyTorchPosterior)
            self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal)
            self.assertEqual(posterior_f.mean.shape, torch.Size([3, 2, 2]))
            self.assertEqual(posterior_f.variance.shape, torch.Size([3, 2, 2]))
Example #19
0
    def fit(self, Xc: Tensor, Xe: Tensor, y: Tensor):
        Xc, Xe, y = filter_nan(Xc, Xe, y, 'all')
        self.fit_scaler(Xc, Xe, y)
        Xc, Xe, y = self.xtrans(Xc, Xe, y)

        assert (Xc.shape[1] == self.num_cont)
        assert (Xe.shape[1] == self.num_enum)
        assert (y.shape[1] == self.num_out)

        self.Xc = Xc
        self.Xe = Xe
        self.y = y

        n_constr = GreaterThan(self.noise_lb)
        n_prior = LogNormalPrior(-4.63, 0.5)
        if self.num_out == 1:
            self.lik = GaussianLikelihood(noise_constraint=n_constr,
                                          noise_prior=n_prior)
        else:
            self.lik = MultitaskGaussianLikelihood(num_tasks=self.num_out,
                                                   noise_constraint=n_constr,
                                                   noise_prior=n_prior)
        self.gp = GPyTorchModel(self.Xc, self.Xe, self.y, self.lik,
                                **self.conf)

        if self.num_out == 1:  # XXX: only tuned for single-output BO
            if self.num_cont > 0:
                self.gp.kern.outputscale = self.y.var()
                lscales = self.gp.kern.base_kernel.lengthscale.detach().clone(
                ).view(1, -1)
                for i in range(self.num_cont):
                    lscales[0, i] = torch.pdist(self.Xc[:, i].view(
                        -1, 1)).median().clamp(min=0.02)
                self.gp.kern.base_kernel.lengthscale = lscales
            if self.noise_free:
                self.gp.likelihood.noise = self.noise_lb * 1.1
                self.gp.likelihood.raw_noise.requires_grad = False
            else:
                self.gp.likelihood.noise = max(1e-2, self.noise_lb)

        self.gp.train()
        self.lik.train()

        opt = torch.optim.LBFGS(self.gp.parameters(),
                                lr=self.lr,
                                max_iter=5,
                                line_search_fn='strong_wolfe')
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.lik, self.gp)
        for epoch in range(self.num_epochs):

            def closure():
                dist = self.gp(self.Xc, self.Xe)
                loss = -1 * mll(dist, self.y.squeeze())
                opt.zero_grad()
                loss.backward()
                return loss

            opt.step(closure)
            if self.verbose and ((epoch + 1) % self.print_every == 0
                                 or epoch == 0):
                print('After %d epochs, loss = %g' %
                      (epoch + 1, closure().item()),
                      flush=True)
        self.gp.eval()
        self.lik.eval()
Example #20
0
def gprTorch_multiTask():
    """ 
        Multi-Task GPR + heteroscedastic noise level
    """
    #synthetic data
    train_x = torch.linspace(0, 1, 75)

    sem_y1 = 0.05 + (0.55 - 0.05) * torch.linspace(0, 1, 75)
    sem_y2 = 0.75 - (0.75 - 0.05) * torch.linspace(0, 1, 75)

    train_y = torch.stack([
        torch.sin(train_x *
                  (2 * math.pi)) + sem_y1 * torch.randn(train_x.size()),
        torch.cos(train_x *
                  (2 * math.pi)) + sem_y2 * torch.randn(train_x.size()),
    ], -1)

    train_y_log_var = torch.stack([(s**2).log() for s in (sem_y1, sem_y2)], -1)

    #construct the GPR
    numTasks = 2

    log_noise_model = MultitaskGPModel(
        train_x,
        train_y_log_var,
        MultitaskGaussianLikelihood(num_tasks=numTasks),
        num_tasks=numTasks,
    )

    likelihood = _MultitaskGaussianLikelihoodBase(
        num_tasks=numTasks,
        noise_covar=HeteroskedasticNoise(log_noise_model),
    )

    model = MultitaskGPModel(train_x,
                             train_y,
                             likelihood,
                             num_tasks=numTasks,
                             rank=numTasks)

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(
        [
            {
                'params': model.parameters()
            },  # Includes GaussianLikelihood parameters
        ],
        lr=0.1)

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    n_iter = 75
    for i in range(n_iter):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y, train_x)
        loss.backward()
        if (i + 1) % 10 == 0:
            print('Iter %d/%d - Loss: %.3f' % (i + 1, n_iter, loss.item()))
        optimizer.step()

    model.eval()
    likelihood.eval()

    with torch.no_grad():
        test_x = torch.linspace(0, 1, 35)
        post_f = model(test_x)
        post_obs = likelihood(post_f, test_x)

    with torch.no_grad():
        f, axs = plt.subplots(1, 2, figsize=(14, 6))
        lower_f, upper_f = post_f.confidence_region()
        lower_obs, upper_obs = post_obs.confidence_region()
        for i, ax in enumerate(axs):
            ax.plot(train_x.numpy(), train_y[:, i].numpy(), 'k*')
            ax.plot(test_x.numpy(), post_f.mean[:, i].numpy(), 'b')
            ax.fill_between(test_x.numpy(),
                            lower_f[:, i].numpy(),
                            upper_f[:, i].numpy(),
                            alpha=0.5)
            ax.fill_between(test_x.numpy(),
                            lower_obs[:, i].numpy(),
                            upper_obs[:, i].numpy(),
                            alpha=0.25,
                            color='r')
            ax.set_ylim([-3, 3])
            ax.legend([
                'Observed Data', 'Mean', 'Confidence (f)', 'Confidence (obs)'
            ])
        plt.title('Multi-Task GP + Heteroscedastic Noise')
        plt.show()
Example #21
0
def train_gp(train_x, train_y, use_ard, num_steps, hypers={}):
    """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized."""
    assert train_x.ndim == 2
    assert train_y.ndim == 2
    assert train_x.shape[0] == train_y.shape[0]

    # Create hyper parameter bounds
    noise_constraint = Interval(5e-4, 0.2)
    if use_ard:
        lengthscale_constraint = Interval(0.005, 2.0)
    else:
        lengthscale_constraint = Interval(0.005, math.sqrt(
            train_x.shape[1]))  # [0.005, sqrt(dim)]
    outputscale_constraint = Interval(0.05, 20.0)

    # Create models
    likelihood = MultitaskGaussianLikelihood(
        num_tasks=train_y.size(-1),
        noise_constraint=noise_constraint,
    ).to(device=train_x.device, dtype=train_y.dtype)
    ard_dims = train_x.shape[1] if use_ard else None
    model = GP(
        train_x=train_x,
        train_y=train_y,
        likelihood=likelihood,
        lengthscale_constraint=lengthscale_constraint,
        outputscale_constraint=outputscale_constraint,
        ard_dims=ard_dims,
    ).to(device=train_x.device, dtype=train_x.dtype)

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # "Loss" for GPs - the marginal log likelihood
    mll = ExactMarginalLogLikelihood(likelihood, model)

    # Initialize model hypers
    if hypers:
        model.load_state_dict(hypers)
    else:
        hypers = {}
        hypers["covar_module.outputscale"] = 1.0
        hypers["covar_module.base_kernel.lengthscale"] = 0.5
        hypers["likelihood.noise"] = 0.005
        model.initialize(**hypers)

    # Use the adam optimizer
    optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1)

    for _ in range(num_steps):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        optimizer.step()

    # Switch to eval mode
    model.eval()
    likelihood.eval()

    return model
Example #22
0
    def __init__(
        self,
        train_X: Tensor,
        train_Y: Optional[Tensor] = None,
        likelihood: Optional[Likelihood] = None,
        num_outputs: int = 1,
        learn_inducing_points: bool = True,
        covar_module: Optional[Kernel] = None,
        mean_module: Optional[Mean] = None,
        variational_distribution: Optional[_VariationalDistribution] = None,
        variational_strategy: Type[_VariationalStrategy] = VariationalStrategy,
        inducing_points: Optional[Union[Tensor, int]] = None,
        outcome_transform: Optional[OutcomeTransform] = None,
        input_transform: Optional[InputTransform] = None,
    ) -> None:
        r"""
        A single task stochastic variational Gaussian process model (SVGP) as described
        by [hensman2013svgp]_. We use pivoted cholesky initialization [burt2020svgp]_ to
        initialize the inducing points of the model.

        Args:
            train_X: Training inputs (due to the ability of the SVGP to sub-sample
                this does not have to be all of the training inputs).
            train_Y: Training targets (optional).
            likelihood: Instance of a GPyYorch likelihood. If omitted, uses a
                either a `GaussianLikelihood` (if `num_outputs=1`) or a
                `MultitaskGaussianLikelihood`(if `num_outputs>1`).
            num_outputs: Number of output responses per input (default: 1).
            covar_module: Kernel function. If omitted, uses a `MaternKernel`.
            mean_module: Mean of GP model. If omitted, uses a `ConstantMean`.
            variational_distribution: Type of variational distribution to use
                (default: CholeskyVariationalDistribution), the properties of the
                variational distribution will encourage scalability or ease of
                optimization.
            variational_strategy: Type of variational strategy to use (default:
                VariationalStrategy). The default setting uses "whitening" of the
                variational distribution to make training easier.
            inducing_points: The number or specific locations of the inducing points.
        """
        with torch.no_grad():
            transformed_X = self.transform_inputs(
                X=train_X, input_transform=input_transform)
        if train_Y is not None:
            if outcome_transform is not None:
                train_Y, _ = outcome_transform(train_Y)
            self._validate_tensor_args(X=transformed_X, Y=train_Y)
            validate_input_scaling(train_X=transformed_X, train_Y=train_Y)
            if train_Y.shape[-1] != num_outputs:
                num_outputs = train_Y.shape[-1]

        self._num_outputs = num_outputs
        self._input_batch_shape = train_X.shape[:-2]
        aug_batch_shape = copy.deepcopy(self._input_batch_shape)
        if num_outputs > 1:
            aug_batch_shape += torch.Size([num_outputs])
        self._aug_batch_shape = aug_batch_shape

        if likelihood is None:
            if num_outputs == 1:
                noise_prior = GammaPrior(1.1, 0.05)
                noise_prior_mode = (noise_prior.concentration -
                                    1) / noise_prior.rate
                likelihood = GaussianLikelihood(
                    noise_prior=noise_prior,
                    batch_shape=self._aug_batch_shape,
                    noise_constraint=GreaterThan(
                        MIN_INFERRED_NOISE_LEVEL,
                        transform=None,
                        initial_value=noise_prior_mode,
                    ),
                )
            else:
                likelihood = MultitaskGaussianLikelihood(num_tasks=num_outputs)
        else:
            self._is_custom_likelihood = True

        model = _SingleTaskVariationalGP(
            train_X=transformed_X,
            train_Y=train_Y,
            num_outputs=num_outputs,
            learn_inducing_points=learn_inducing_points,
            covar_module=covar_module,
            mean_module=mean_module,
            variational_distribution=variational_distribution,
            variational_strategy=variational_strategy,
            inducing_points=inducing_points,
        )

        super().__init__(model=model,
                         likelihood=likelihood,
                         num_outputs=num_outputs)

        if outcome_transform is not None:
            self.outcome_transform = outcome_transform
        if input_transform is not None:
            self.input_transform = input_transform

        # for model fitting utilities
        # TODO: make this a flag?
        self.model.train_inputs = [transformed_X]
        if train_Y is not None:
            self.model.train_targets = train_Y.squeeze(-1)

        self.to(train_X)
Example #23
0
def run(obs, params_true, device='cpu'):
    device = safe_cast(torch.device, device)

    dx, NX = PARAM_DX, PARAM_MESH_RES_SPACE

    ts = torch.arange(PARAM_MESH_RES_TIME, device=device)

    priors_uniform = priors()

    y = torch.tensor(obs['Ss'], device=device)

    def simulate(params):
        _theta = {'a': params[0], 'b': params[1], 'k': params[2]}
        sim_pde = LandauCahnHilliard(params=_theta,
                                     M=PARAM_DT,
                                     dx=dx,
                                     device=device)
        loss_fn = Evaluator(sim_pde, loss)

        return loss_fn

    pgd = lhs(3, samples=PARAM_INIT_EVAL
              )  # calculate initial samples from latin hypercube
    xs, ys = [], []

    for j in range(PARAM_INIT_EVAL):
        xk = torch.stack(
            [(priors_uniform[k][1] - priors_uniform[k][0]) *
             torch.tensor(pgd[j, i], device=device, dtype=torch.float32) +
             priors_uniform[k][0] for i, k in enumerate(('a', 'b', 'k'))], 0)
        xs.append(xk)


#    ell, params = simulate(params)

    phi0 = (0.2 * torch.rand((NX, NX), device=device)).view(-1, 1, NX, NX)

    # with torch.no_grad():
    for j in range(PARAM_INIT_EVAL):
        params = xs[j]
        loss_fn = simulate(params)
        # print(loss_fn._pde)
        # for n,p in loss_fn.named_parameters():
        #     print(n + '->' + str(p))
        ell = loss_fn(phi0, ts, y, dx)
        ell.backward()
        grads = (loss_fn._pde._a.grad, loss_fn._pde._b.grad,
                 loss_fn._pde._k.grad)
        ys.append(torch.stack([ell.detach(), *grads]).unsqueeze(0))
        print('init sample %d/%d' % (j, PARAM_INIT_EVAL))

    x_init, y_init = torch.stack(xs), torch.cat(ys, 0)

    #
    # print(y_init)
    N = PARAM_SEARCH_RES
    x_eval = torch.cat([x.reshape(-1,1) for x in torch.meshgrid(
        *[torch.linspace(priors_uniform[k][0], priors_uniform[k][1], N)\
            for k in priors_uniform]
    )],1)

    print(x_init.shape)
    print(x_eval.shape)
    x_train = x_init
    y_train = y_init

    print(x_init)
    print(y_init)

    jit = 1e-2

    lik = MultitaskGaussianLikelihood(num_tasks=4)
    lik.noise_covar.noise = jit * torch.ones(4)
    lik.noise = torch.tensor(jit).sqrt()

    for i in range(PARAM_MAX_EVAL - PARAM_INIT_EVAL):
        for ntry in range(5):
            model = ExactGPModel(x_train, y_train, lik)
            try:
                optimise(model, method='adam', max_iter=1000)
                break
            except Exception as err:
                print('attempt %d failed' % ntry)
                if ntry == 4:
                    raise err

        u = acq(y_train[:, 0].min(), model, x_eval)
        idx = u.argmax()
        xn = x_eval[idx, :]
        loss_fn = simulate(xn)

        ell = loss_fn(phi0, ts, y, dx)
        ell.backward()
        grads = (loss_fn._pde._a.grad, loss_fn._pde._b.grad,
                 loss_fn._pde._k.grad)
        #ys.append(torch.stack([ell.detach(), *grads]).unsqueeze(0))

        yn = torch.stack([ell.detach(), *grads], -1).unsqueeze(0)

        x_eval = torch.cat([x_eval[0:idx, :], x_eval[idx + 1:, :]], 0)
        x_train = torch.cat([x_train, xn.reshape(1, -1)])

        # y_train = torch.stack([*y_train, yn.detach()])
        y_train = torch.cat([y_train, yn], 0)
        print(x_train)
        print(y_train)

        print(i)
    #
    return (x_train, y_train)
Example #24
0
class GP(BaseModel):
    support_grad = True
    support_multi_output = True

    def __init__(self, num_cont, num_enum, num_out, **conf):
        super().__init__(num_cont, num_enum, num_out, **conf)
        self.lr = conf.get('lr', 3e-2)
        self.num_epochs = conf.get('num_epochs', 100)
        self.verbose = conf.get('verbose', False)
        self.print_every = conf.get('print_every', 10)
        self.noise_free = conf.get('noise_free', False)
        self.pred_likeli = conf.get('pred_likeli', True)
        self.noise_lb = conf.get('noise_lb', 1e-5)
        self.xscaler = TorchMinMaxScaler((-1, 1))
        self.yscaler = TorchStandardScaler()

    def fit_scaler(self, Xc: Tensor, Xe: Tensor, y: Tensor):
        if Xc is not None and Xc.shape[1] > 0:
            self.xscaler.fit(Xc)
        self.yscaler.fit(y)

    def xtrans(self, Xc: Tensor, Xe: Tensor, y: Tensor = None):
        if Xc is not None and Xc.shape[1] > 0:
            Xc_t = self.xscaler.transform(Xc)
        else:
            Xc_t = torch.zeros(Xe.shape[0], 0)

        if Xe is None:
            Xe_t = torch.zeros(Xc.shape[0], 0).long()
        else:
            Xe_t = Xe.long()

        if y is not None:
            y_t = self.yscaler.transform(y)
            return Xc_t, Xe_t, y_t
        else:
            return Xc_t, Xe_t

    def fit(self, Xc: Tensor, Xe: Tensor, y: Tensor):
        Xc, Xe, y = filter_nan(Xc, Xe, y, 'all')
        self.fit_scaler(Xc, Xe, y)
        Xc, Xe, y = self.xtrans(Xc, Xe, y)

        assert (Xc.shape[1] == self.num_cont)
        assert (Xe.shape[1] == self.num_enum)
        assert (y.shape[1] == self.num_out)

        self.Xc = Xc
        self.Xe = Xe
        self.y = y

        n_constr = GreaterThan(self.noise_lb)
        n_prior = LogNormalPrior(-4.63, 0.5)
        if self.num_out == 1:
            self.lik = GaussianLikelihood(noise_constraint=n_constr,
                                          noise_prior=n_prior)
        else:
            self.lik = MultitaskGaussianLikelihood(num_tasks=self.num_out,
                                                   noise_constraint=n_constr,
                                                   noise_prior=n_prior)
        self.gp = GPyTorchModel(self.Xc, self.Xe, self.y, self.lik,
                                **self.conf)

        if self.num_out == 1:  # XXX: only tuned for single-output BO
            if self.num_cont > 0:
                self.gp.kern.outputscale = self.y.var()
                lscales = self.gp.kern.base_kernel.lengthscale.detach().clone(
                ).view(1, -1)
                for i in range(self.num_cont):
                    lscales[0, i] = torch.pdist(self.Xc[:, i].view(
                        -1, 1)).median().clamp(min=0.02)
                self.gp.kern.base_kernel.lengthscale = lscales
            if self.noise_free:
                self.gp.likelihood.noise = self.noise_lb * 1.1
                self.gp.likelihood.raw_noise.requires_grad = False
            else:
                self.gp.likelihood.noise = max(1e-2, self.noise_lb)

        self.gp.train()
        self.lik.train()

        opt = torch.optim.LBFGS(self.gp.parameters(),
                                lr=self.lr,
                                max_iter=5,
                                line_search_fn='strong_wolfe')
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.lik, self.gp)
        for epoch in range(self.num_epochs):

            def closure():
                dist = self.gp(self.Xc, self.Xe)
                loss = -1 * mll(dist, self.y.squeeze())
                opt.zero_grad()
                loss.backward()
                return loss

            opt.step(closure)
            if self.verbose and ((epoch + 1) % self.print_every == 0
                                 or epoch == 0):
                print('After %d epochs, loss = %g' %
                      (epoch + 1, closure().item()),
                      flush=True)
        self.gp.eval()
        self.lik.eval()

    def predict(self, Xc, Xe):
        Xc, Xe = self.xtrans(Xc, Xe)
        with gpytorch.settings.fast_pred_var(), gpytorch.settings.debug(False):
            pred = self.gp(Xc, Xe)
            if self.pred_likeli:
                pred = self.lik(pred)
            mu_ = pred.mean.reshape(-1, self.num_out)
            var_ = pred.variance.reshape(-1, self.num_out)
        mu = self.yscaler.inverse_transform(mu_)
        var = var_ * self.yscaler.std**2
        return mu, var

    def sample_y(self, Xc, Xe, n_samples=1) -> FloatTensor:
        """
        Should return (n_samples, Xc.shape[0], self.num_out) 
        """
        Xc, Xe = self.xtrans(Xc, Xe)
        with gpytorch.settings.debug(False):
            if self.pred_likeli:
                pred = self.lik(self.gp(Xc, Xe))
            else:
                pred = self.gp(Xc, Xe)
            samp = pred.rsample(torch.Size(
                (n_samples, ))).view(n_samples, Xc.shape[0], self.num_out)
            return self.yscaler.inverse_transform(samp)

    def sample_f(self):
        raise NotImplementedError(
            'Thompson sampling is not supported for GP, use `sample_y` instead'
        )

    @property
    def noise(self):
        if self.num_out == 1:
            return (self.gp.likelihood.noise * self.yscaler.std**2).view(
                self.num_out).detach()
        else:
            return (self.gp.likelihood.noise_covar.noise *
                    self.yscaler.std**2).view(self.num_out).detach()