def test_lcm_icm_equivalence(self):
        # Training points are every 0.1 in [0,1] (note that they're the same for both tasks)
        train_x = torch.linspace(0, 1, 100)
        # y1 function is sin(2*pi*x) with noise N(0, 0.04)
        train_y1 = torch.sin(train_x.data * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2
        # y2 function is cos(2*pi*x) with noise N(0, 0.04)
        train_y2 = torch.cos(train_x.data * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2
        # Create a train_y which interleaves the two
        train_y = torch.stack([train_y1, train_y2], -1)

        likelihood = MultitaskGaussianLikelihood(num_tasks=2)
        model = MultitaskGPModel(train_x, train_y, likelihood)

        # Use the adam optimizer
        optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1)  # Includes GaussianLikelihood parameters
        model.train()
        likelihood.train()
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
        n_iter = 50
        for _ in range(n_iter):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()
        model.eval()
        likelihood.eval()

        # Make predictions for LCM
        with torch.no_grad():
            test_x = torch.linspace(0, 1, 51)
            observed_pred = likelihood(model(test_x))
            mean = observed_pred.mean

        model_icm = MultitaskGPModel_ICM(train_x, train_y, likelihood)
        likelihood = MultitaskGaussianLikelihood(num_tasks=2)
        model_icm.train()
        likelihood.train()
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model_icm)
        optimizer = torch.optim.Adam(
            [{"params": model_icm.parameters()}], lr=0.1  # Includes GaussianLikelihood parameters
        )
        for _ in range(n_iter):
            optimizer.zero_grad()
            output = model_icm(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()
        model_icm.eval()
        likelihood.eval()

        # Make predictions for ICM
        with torch.no_grad():
            test_x = torch.linspace(0, 1, 51)
            observed_pred_icm = likelihood(model_icm(test_x))
            mean_icm = observed_pred_icm.mean

        # Make sure predictions from LCM with one base kernel and ICM are the same.
        self.assertLess((mean - mean_icm).pow(2).mean(), 1e-2)
Exemple #2
0
    def test_train_and_eval(self):
        # We're manually going to set the hyperparameters to something they shouldn't be
        likelihood = MultitaskGaussianLikelihood(num_tasks=4)
        model = LMCModel()

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()
        optimizer = torch.optim.Adam([
            {'params': model.parameters()},
            {'params': likelihood.parameters()},
        ], lr=0.01)

        # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO
        mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))

        # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less
        # effective for VI.
        for i in range(400):
            # Within each iteration, we will go over each minibatch of data
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()

            for param in model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

        # Test the model
        model.eval()
        likelihood.eval()

        # Make predictions for both sets of test points, and check MAEs.
        with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1):
            batch_predictions = likelihood(model(train_x))
            preds1 = batch_predictions.mean[:, 0]
            preds2 = batch_predictions.mean[:, 1]
            preds3 = batch_predictions.mean[:, 2]
            preds4 = batch_predictions.mean[:, 3]
            mean_abs_error1 = torch.mean(torch.abs(train_y[..., 0] - preds1))
            mean_abs_error2 = torch.mean(torch.abs(train_y[..., 1] - preds2))
            mean_abs_error3 = torch.mean(torch.abs(train_y[..., 2] - preds3))
            mean_abs_error4 = torch.mean(torch.abs(train_y[..., 3] - preds4))
            self.assertLess(mean_abs_error1.squeeze().item(), 0.15)
            self.assertLess(mean_abs_error2.squeeze().item(), 0.15)
            self.assertLess(mean_abs_error3.squeeze().item(), 0.15)
            self.assertLess(mean_abs_error4.squeeze().item(), 0.15)

            # Smoke test for getting predictive uncertainties
            lower, upper = batch_predictions.confidence_region()
            self.assertEqual(lower.shape, train_y.shape)
            self.assertEqual(upper.shape, train_y.shape)
    def test_multitask_gp_mean_abs_error(self, cuda=False):
        train_x, train_y = self._get_data(cuda=cuda)
        likelihood = MultitaskGaussianLikelihood(num_tasks=2)
        model = MultitaskGPModel(train_x, train_y, likelihood)

        if cuda:
            model.cuda()

        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        # Use the adam optimizer
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=0.1)  # Includes GaussianLikelihood parameters

        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

        n_iter = 50
        for _ in range(n_iter):
            # Zero prev backpropped gradients
            optimizer.zero_grad()
            # Make predictions from training data
            # Again, note feeding duplicated x_data and indices indicating which task
            output = model(train_x)
            # TODO: Fix this view call!!
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()

        # Test the model
        model.eval()
        likelihood.eval()

        test_x = torch.linspace(
            0,
            1,
            51,
            device=torch.device("cuda") if cuda else torch.device("cpu"))
        test_y1 = torch.sin(test_x * (2 * pi))
        test_y2 = torch.cos(test_x * (2 * pi))
        test_preds = likelihood(model(test_x)).mean
        mean_abs_error_task_1 = torch.mean(
            torch.abs(test_y1 - test_preds[:, 0]))
        mean_abs_error_task_2 = torch.mean(
            torch.abs(test_y2 - test_preds[:, 1]))

        self.assertLess(mean_abs_error_task_1.squeeze().item(), 0.05)
        self.assertLess(mean_abs_error_task_2.squeeze().item(), 0.05)
    def test_train_on_single_set_test_on_batch(self):
        # We're manually going to set the hyperparameters to something they shouldn't be
        likelihood = MultitaskGaussianLikelihood(
            log_noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(1),
                                                        scale=torch.ones(1),
                                                        log_transform=True),
            num_tasks=2,
        )
        gp_model = ExactGPModel(train_x1, train_y1, likelihood)
        mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model)

        # Find optimal model hyperparameters
        gp_model.train()
        likelihood.train()
        optimizer = optim.Adam(list(gp_model.parameters()) +
                               list(likelihood.parameters()),
                               lr=0.1)
        optimizer.n_iter = 0
        gp_model.train()
        likelihood.train()
        optimizer = optim.Adam(gp_model.parameters(), lr=0.1)
        for _ in range(50):
            optimizer.zero_grad()
            output = gp_model(train_x1)
            loss = -mll(output, train_y1).sum()
            loss.backward()
            optimizer.step()

            for param in gp_model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

        # Test the model
        gp_model.eval()
        likelihood.eval()

        # Make predictions for both sets of test points, and check MAEs.
        batch_predictions = likelihood(gp_model(test_x12))
        preds1 = batch_predictions.mean[0]
        preds2 = batch_predictions.mean[1]
        mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1))
        mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2))
        self.assertLess(mean_abs_error1.squeeze().item(), 0.05)
        self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
Exemple #5
0
    def test_train_and_eval(self):
        # We're manually going to set the hyperparameters to something they shouldn't be
        likelihood = MultitaskGaussianLikelihood(num_tasks=2)
        gp_model = ExactGPModel(train_x, train_y12, likelihood)
        mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model)

        # Find optimal model hyperparameters
        gp_model.train()
        likelihood.train()
        optimizer = optim.Adam(gp_model.parameters(), lr=0.1)
        optimizer.n_iter = 0
        for _ in range(75):
            optimizer.zero_grad()
            output = gp_model(train_x)
            loss = -mll(output, train_y12).sum()
            loss.backward()
            optimizer.step()

            for param in gp_model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)

        # Test the model
        gp_model.eval()
        likelihood.eval()

        # Make predictions for both sets of test points, and check MAEs.
        with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1):
            batch_predictions = likelihood(gp_model(test_x))
            preds1 = batch_predictions.mean[:, 0]
            preds2 = batch_predictions.mean[:, 1]
            mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1))
            mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2))
            self.assertLess(mean_abs_error1.squeeze().item(), 0.01)
            self.assertLess(mean_abs_error2.squeeze().item(), 0.01)

            # Smoke test for getting predictive uncertainties
            lower, upper = batch_predictions.confidence_region()
            self.assertEqual(lower.shape, test_y12.shape)
            self.assertEqual(upper.shape, test_y12.shape)
Exemple #6
0
    def test_multitask_low_rank_noise_covar(self):
        likelihood = MultitaskGaussianLikelihood(n_tasks=2, rank=1)
        model = MultitaskGPModel(train_x, train_y, likelihood)
        # Find optimal model hyperparameters
        model.train()
        likelihood.train()

        # Use the adam optimizer
        optimizer = torch.optim.Adam(
            [{
                "params": model.parameters()
            }],  # Includes GaussianLikelihood parameters
            lr=0.1,
        )

        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

        n_iter = 50
        for _ in range(n_iter):
            # Zero prev backpropped gradients
            optimizer.zero_grad()
            # Make predictions from training data
            # Again, note feeding duplicated x_data and indices indicating which task
            output = model(train_x)
            # TODO: Fix this view call!!
            loss = -mll(output, train_y)
            loss.backward()
            optimizer.step()

        # Test the model
        model.eval()
        likelihood.eval()

        n_tasks = 2
        task_noise_covar_factor = likelihood.task_noise_covar_factor
        log_noise = likelihood.log_noise
        task_noise_covar = task_noise_covar_factor.matmul(
            task_noise_covar_factor.transpose(
                -1, -2)) + log_noise.exp() * torch.eye(n_tasks)

        self.assertGreater(task_noise_covar[0, 1].data.squeeze().item(), 0.05)
Exemple #7
0
class GP(BaseModel):
    support_grad = True
    support_multi_output = True

    def __init__(self, num_cont, num_enum, num_out, **conf):
        super().__init__(num_cont, num_enum, num_out, **conf)
        self.lr = conf.get('lr', 3e-2)
        self.num_epochs = conf.get('num_epochs', 100)
        self.verbose = conf.get('verbose', False)
        self.print_every = conf.get('print_every', 10)
        self.noise_free = conf.get('noise_free', False)
        self.pred_likeli = conf.get('pred_likeli', True)
        self.noise_lb = conf.get('noise_lb', 1e-5)
        self.xscaler = TorchMinMaxScaler((-1, 1))
        self.yscaler = TorchStandardScaler()

    def fit_scaler(self, Xc: Tensor, Xe: Tensor, y: Tensor):
        if Xc is not None and Xc.shape[1] > 0:
            self.xscaler.fit(Xc)
        self.yscaler.fit(y)

    def xtrans(self, Xc: Tensor, Xe: Tensor, y: Tensor = None):
        if Xc is not None and Xc.shape[1] > 0:
            Xc_t = self.xscaler.transform(Xc)
        else:
            Xc_t = torch.zeros(Xe.shape[0], 0)

        if Xe is None:
            Xe_t = torch.zeros(Xc.shape[0], 0).long()
        else:
            Xe_t = Xe.long()

        if y is not None:
            y_t = self.yscaler.transform(y)
            return Xc_t, Xe_t, y_t
        else:
            return Xc_t, Xe_t

    def fit(self, Xc: Tensor, Xe: Tensor, y: Tensor):
        Xc, Xe, y = filter_nan(Xc, Xe, y, 'all')
        self.fit_scaler(Xc, Xe, y)
        Xc, Xe, y = self.xtrans(Xc, Xe, y)

        assert (Xc.shape[1] == self.num_cont)
        assert (Xe.shape[1] == self.num_enum)
        assert (y.shape[1] == self.num_out)

        self.Xc = Xc
        self.Xe = Xe
        self.y = y

        n_constr = GreaterThan(self.noise_lb)
        n_prior = LogNormalPrior(-4.63, 0.5)
        if self.num_out == 1:
            self.lik = GaussianLikelihood(noise_constraint=n_constr,
                                          noise_prior=n_prior)
        else:
            self.lik = MultitaskGaussianLikelihood(num_tasks=self.num_out,
                                                   noise_constraint=n_constr,
                                                   noise_prior=n_prior)
        self.gp = GPyTorchModel(self.Xc, self.Xe, self.y, self.lik,
                                **self.conf)

        if self.num_out == 1:  # XXX: only tuned for single-output BO
            if self.num_cont > 0:
                self.gp.kern.outputscale = self.y.var()
                lscales = self.gp.kern.base_kernel.lengthscale.detach().clone(
                ).view(1, -1)
                for i in range(self.num_cont):
                    lscales[0, i] = torch.pdist(self.Xc[:, i].view(
                        -1, 1)).median().clamp(min=0.02)
                self.gp.kern.base_kernel.lengthscale = lscales
            if self.noise_free:
                self.gp.likelihood.noise = self.noise_lb * 1.1
                self.gp.likelihood.raw_noise.requires_grad = False
            else:
                self.gp.likelihood.noise = max(1e-2, self.noise_lb)

        self.gp.train()
        self.lik.train()

        opt = torch.optim.LBFGS(self.gp.parameters(),
                                lr=self.lr,
                                max_iter=5,
                                line_search_fn='strong_wolfe')
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.lik, self.gp)
        for epoch in range(self.num_epochs):

            def closure():
                dist = self.gp(self.Xc, self.Xe)
                loss = -1 * mll(dist, self.y.squeeze())
                opt.zero_grad()
                loss.backward()
                return loss

            opt.step(closure)
            if self.verbose and ((epoch + 1) % self.print_every == 0
                                 or epoch == 0):
                print('After %d epochs, loss = %g' %
                      (epoch + 1, closure().item()),
                      flush=True)
        self.gp.eval()
        self.lik.eval()

    def predict(self, Xc, Xe):
        Xc, Xe = self.xtrans(Xc, Xe)
        with gpytorch.settings.fast_pred_var(), gpytorch.settings.debug(False):
            pred = self.gp(Xc, Xe)
            if self.pred_likeli:
                pred = self.lik(pred)
            mu_ = pred.mean.reshape(-1, self.num_out)
            var_ = pred.variance.reshape(-1, self.num_out)
        mu = self.yscaler.inverse_transform(mu_)
        var = var_ * self.yscaler.std**2
        return mu, var

    def sample_y(self, Xc, Xe, n_samples=1) -> FloatTensor:
        """
        Should return (n_samples, Xc.shape[0], self.num_out) 
        """
        Xc, Xe = self.xtrans(Xc, Xe)
        with gpytorch.settings.debug(False):
            if self.pred_likeli:
                pred = self.lik(self.gp(Xc, Xe))
            else:
                pred = self.gp(Xc, Xe)
            samp = pred.rsample(torch.Size(
                (n_samples, ))).view(n_samples, Xc.shape[0], self.num_out)
            return self.yscaler.inverse_transform(samp)

    def sample_f(self):
        raise NotImplementedError(
            'Thompson sampling is not supported for GP, use `sample_y` instead'
        )

    @property
    def noise(self):
        if self.num_out == 1:
            return (self.gp.likelihood.noise * self.yscaler.std**2).view(
                self.num_out).detach()
        else:
            return (self.gp.likelihood.noise_covar.noise *
                    self.yscaler.std**2).view(self.num_out).detach()
def train_gp(train_x, train_y, use_ard, num_steps, hypers={}):
    """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized."""
    assert train_x.ndim == 2
    assert train_y.ndim == 2
    assert train_x.shape[0] == train_y.shape[0]

    # Create hyper parameter bounds
    noise_constraint = Interval(5e-4, 0.2)
    if use_ard:
        lengthscale_constraint = Interval(0.005, 2.0)
    else:
        lengthscale_constraint = Interval(0.005, math.sqrt(
            train_x.shape[1]))  # [0.005, sqrt(dim)]
    outputscale_constraint = Interval(0.05, 20.0)

    # Create models
    likelihood = MultitaskGaussianLikelihood(
        num_tasks=train_y.size(-1),
        noise_constraint=noise_constraint,
    ).to(device=train_x.device, dtype=train_y.dtype)
    ard_dims = train_x.shape[1] if use_ard else None
    model = GP(
        train_x=train_x,
        train_y=train_y,
        likelihood=likelihood,
        lengthscale_constraint=lengthscale_constraint,
        outputscale_constraint=outputscale_constraint,
        ard_dims=ard_dims,
    ).to(device=train_x.device, dtype=train_x.dtype)

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # "Loss" for GPs - the marginal log likelihood
    mll = ExactMarginalLogLikelihood(likelihood, model)

    # Initialize model hypers
    if hypers:
        model.load_state_dict(hypers)
    else:
        hypers = {}
        hypers["covar_module.outputscale"] = 1.0
        hypers["covar_module.base_kernel.lengthscale"] = 0.5
        hypers["likelihood.noise"] = 0.005
        model.initialize(**hypers)

    # Use the adam optimizer
    optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1)

    for _ in range(num_steps):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        optimizer.step()

    # Switch to eval mode
    model.eval()
    likelihood.eval()

    return model