Ejemplo n.º 1
0
    def predict(self, input):
        input = transform(input.reshape((-1, self.input_size)), self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                output = self.likelihood(self.model(input)).mean

        output = inverse_transform(output, self.target_trans)
        if self.incremental:
            return input[..., :self.target_size] + output
        else:
            return output
Ejemplo n.º 2
0
    def predict(self, input):
        self.device = torch.device('cpu')

        self.model.eval().to(self.device)
        self.likelihood.eval().to(self.device)

        input = transform(torch.reshape(input, (-1, self.input_size)), self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                output = self.likelihood(self.model(input)).mean

        output = inverse_transform(output[:, None], self.target_trans).squeeze()
        return output
Ejemplo n.º 3
0
    def predict(self, input):
        self.device = torch.device('cpu')

        self.model.eval().to(self.device)
        self.likelihood.eval().to(self.device)

        input = transform(input.reshape((-1, self.input_size)),
                          self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                _input = [input for _ in range(self.target_size)]
                predictions = self.likelihood(*self.model(*_input))
                output = torch.stack([_pred.mean for _pred in predictions]).T

        output = inverse_transform(output, self.target_trans).squeeze()
        return output
Ejemplo n.º 4
0
            acq_value.item(),
            pred_rmse.item(),
            pred_avg_variance.item()
        ]
        print("Step RMSE: ", pred_rmse)
        all_outputs.append(step_output_list)

        start_ind = end_ind
        end_ind = int(end_ind + args.batch_size)

    output_dict = {
        "model_state_dict": model.cpu().state_dict(),
        "queried_points": {
            'x': model.cpu().train_inputs[0],
            'y': model.cpu().train_targets
        },
        "results": DataFrame(all_outputs)
    }
    torch.save(output_dict, args.output)


if __name__ == "__main__":
    args = parse()
    with fast_pred_var(True), \
            use_toeplitz(args.toeplitz), \
            detach_test_caches(True), \
            max_cholesky_size(args.cholesky_size), \
            max_root_decomposition_size(args.sketch_size), \
            root_pred_var(True):
        main(args)
Ejemplo n.º 5
0
def main(args):
    if args.cuda and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    init_dict, train_dict, test_dict = prepare_data(args.data_loc,
                                                    args.num_init,
                                                    args.num_total,
                                                    test_is_year=False)
    init_x, init_y, init_y_var = (
        init_dict["x"].to(device),
        init_dict["y"].to(device),
        init_dict["y_var"].to(device),
    )
    train_x, train_y, train_y_var = (
        train_dict["x"].to(device),
        train_dict["y"].to(device),
        train_dict["y_var"].to(device),
    )
    test_x, test_y, test_y_var = (
        test_dict["x"].to(device),
        test_dict["y"].to(device),
        test_dict["y_var"].to(device),
    )

    model = FixedNoiseOnlineSKIGP(
        init_x,
        init_y.view(-1, 1),
        init_y_var.view(-1, 1),
        GridInterpolationKernel(
            base_kernel=ScaleKernel(
                MaternKernel(
                    ard_num_dims=2,
                    nu=0.5,
                    lengthscale_prior=GammaPrior(3.0, 6.0),
                ),
                outputscale_prior=GammaPrior(2.0, 0.15),
            ),
            grid_size=30,
            num_dims=2,
            grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]),
        ),
        learn_additional_noise=False,
    ).to(device)

    mll = BatchedWoodburyMarginalLogLikelihood(model.likelihood, model)

    print("---- Fitting initial model ----")
    start = time.time()
    with skip_logdet_forward(True), max_root_decomposition_size(
            args.sketch_size), use_toeplitz(args.toeplitz):
        fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000})
    end = time.time()
    print("Elapsed fitting time: ", end - start)

    model.zero_grad()
    model.eval()

    print("--- Generating initial predictions on test set ----")
    start = time.time()
    with detach_test_caches(True), max_root_decomposition_size(
            args.sketch_size), max_cholesky_size(
                args.cholesky_size), use_toeplitz(args.toeplitz):
        pred_dist = model(test_x)

        pred_mean = pred_dist.mean.detach()
        # pred_var = pred_dist.variance.detach()
    end = time.time()
    print("Elapsed initial prediction time: ", end - start)

    rmse_initial = ((pred_mean.view(-1) - test_y.view(-1))**2).mean().sqrt()
    print("Initial RMSE: ", rmse_initial.item())

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

    mll_time_list = []
    rmse_list = []
    for i in range(500, train_x.shape[0]):
        model.zero_grad()
        model.train()

        start = time.time()
        with skip_logdet_forward(True), max_root_decomposition_size(
                args.sketch_size), max_cholesky_size(
                    args.cholesky_size), use_toeplitz(args.toeplitz):
            loss = -mll(model(train_x[:i]), train_y[:i]).sum()

        loss.backward()
        mll_time = start - time.time()

        optimizer.step()
        model.zero_grad()
        optimizer.zero_grad()
        start = time.time()
        with torch.no_grad():
            model.condition_on_observations(
                train_x[i].unsqueeze(0),
                train_y[i].view(1, 1),
                train_y_var[i].view(-1, 1),
                inplace=True,
            )
        fantasy_time = start - time.time()
        mll_time_list.append([-mll_time, -fantasy_time])

        if i % 25 == 0:
            start = time.time()
            model.eval()
            model.zero_grad()

            with detach_test_caches(), max_root_decomposition_size(
                    args.sketch_size), max_cholesky_size(args.cholesky_size):
                pred_dist = model(test_x)
            end = time.time()

            rmse = (((pred_dist.mean -
                      test_y.view(-1))**2).mean().sqrt().item())
            rmse_list.append([rmse, end - start])
            print("Current RMSE: ", rmse)
            print("Outputscale: ",
                  model.covar_module.base_kernel.raw_outputscale)
            print(
                "Lengthscale: ",
                model.covar_module.base_kernel.base_kernel.raw_lengthscale,
            )

            print("Step: ", i, "Train Loss: ", loss)
            optimizer.param_groups[0]["lr"] *= 0.9

    torch.save({
        "training": mll_time_list,
        "predictions": rmse_list
    }, args.output)
    loss = -marginal_loglikelihood(
        output, y_train)  # this gives the marginal loglikelihood  log(p(y|X))
    loss.backward()

    print(
        f'Iter {i + 1} - Loss: {loss.item()}   noise: {model.likelihood.noise.item()}'
    )

    optimizer.step()

model.eval()
likelihood.eval()

with torch.no_grad(), settings.fast_pred_var(
), settings.max_root_decomposition_size(25):
    x_test = torch.from_numpy(np.linspace(1870, 2030,
                                          200)[:,
                                               np.newaxis]).type(torch.float32)
    x_test = x_test.cuda()
    f_preds = model(x_test)
    y_pred = likelihood(f_preds)

# plot
with torch.no_grad():
    mean = y_pred.mean.cpu().numpy()
    var = y_pred.variance.cpu().numpy()
    samples = y_pred.sample().cpu().numpy()
    plot_gp(mean,
            var,
            x_test.cpu().numpy(),
Ejemplo n.º 7
0
    optimizer.step()

model.eval()
likelihood.eval()

# Test points are regularly spaced along [0,1]
# Make predictions by feeding model through likelihood
# LOVE: fast_pred_var is used for faster computation of predictive posterior
# https://arxiv.org/pdf/1803.06058.pdf
# This can be especially useful in settings like small-scale Bayesian optimization,
# where predictions need to be made at enormous numbers of candidate points,
# but there aren't enough training examples to necessarily warrant the use of sparse GP methods
# max_root_decomposition_size(35) affects the accuracy of the LOVE solves (larger is more accurate, but slower
t1 = time.time()
with torch.no_grad(), fast_pred_var(), max_root_decomposition_size(25):
    x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis])
    x_test = x_test.cuda()
    f_preds = model(
        x_test
    )  #f_preds gives us the mean and cov from a distribution that can be used inside liklihood
    y_pred = likelihood(f_preds)

t2 = time.time()
print(t2 - t1)

# plot
with torch.no_grad():
    mean = y_pred.mean.cpu().numpy()
    var = y_pred.variance.cpu().numpy()
    samples = y_pred.sample().cpu().numpy()
Ejemplo n.º 8
0
for i in range(training_iterations):
    # Zero backprop gradients
    optimizer.zero_grad()
    # Get output from model
    output = model(x_train)
    # Calc loss and backprop derivatives
    loss = -mll(output, y_train)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
    optimizer.step()
    torch.cuda.empty_cache()

model.eval()
likelihood.eval()

x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis])
x_test = x_test.cuda()

with settings.max_preconditioner_size(10), torch.no_grad():
    with settings.max_root_decomposition_size(30), settings.fast_pred_var():
        f_preds = model(x_test)
        y_pred = likelihood(f_preds)

# plot
with torch.no_grad():
    mean = y_pred.mean.cpu().numpy()
    var = y_pred.variance.cpu().numpy()
    samples = y_pred.sample().cpu().numpy()
    plot_gp(mean, var, x_test.cpu().numpy(), X_train=x_train.cpu().numpy(), Y_train=y_train.cpu().numpy(), samples=samples)
Ejemplo n.º 9
0
    for key in y_means:
        y_means[key] = y_means[key].cpu()

    output_dict = {
        "observations": {
            "x": train_x.cpu(),
            "y": train_y.cpu(),
            "means": y_means,
            "latent_y": latent_y.cpu(),
        },
        "results": DataFrame(all_outputs),
        "args": args
    }
    torch.save(output_dict, args.output)


if __name__ == "__main__":
    args = parse()
    use_fast_pred_var = True if not args.use_exact else False

    with use_toeplitz(args.toeplitz), max_cholesky_size(
        args.cholesky_size
    ), max_root_decomposition_size(args.sketch_size), cholesky_jitter(
        1e-3
    ), fast_pred_var(
        use_fast_pred_var 
    ), fast_pred_samples(
        True
    ):
        main(args)
Ejemplo n.º 10
0
    def test_KroneckerMultiTaskGP_custom(self):
        for batch_shape, dtype in itertools.product(
            (torch.Size(),),  # torch.Size([3])), TODO: Fix and test batch mode
            (torch.float, torch.double),
        ):
            tkwargs = {"device": self.device, "dtype": dtype}

            # initialization with custom settings
            likelihood = MultitaskGaussianLikelihood(
                num_tasks=2,
                rank=1,
                batch_shape=batch_shape,
            )
            data_covar_module = MaternKernel(
                nu=1.5,
                lengthscale_prior=GammaPrior(2.0, 4.0),
            )
            task_covar_prior = LKJCovariancePrior(
                n=2,
                eta=torch.tensor(0.5, **tkwargs),
                sd_prior=SmoothedBoxPrior(math.exp(-3), math.exp(2), 0.1),
            )
            model_kwargs = {
                "likelihood": likelihood,
                "data_covar_module": data_covar_module,
                "task_covar_prior": task_covar_prior,
                "rank": 1,
            }

            model, train_X, _ = _get_kronecker_model_and_training_data(
                model_kwargs=model_kwargs, batch_shape=batch_shape, **tkwargs
            )
            self.assertIsInstance(model, KroneckerMultiTaskGP)
            self.assertEqual(model.num_outputs, 2)
            self.assertIsInstance(model.likelihood, MultitaskGaussianLikelihood)
            self.assertEqual(model.likelihood.rank, 1)
            self.assertIsInstance(model.mean_module, MultitaskMean)
            self.assertIsInstance(model.covar_module, MultitaskKernel)
            base_kernel = model.covar_module
            self.assertIsInstance(base_kernel.data_covar_module, MaternKernel)
            self.assertIsInstance(base_kernel.task_covar_module, IndexKernel)
            task_covar_prior = base_kernel.task_covar_module.IndexKernelPrior
            self.assertIsInstance(task_covar_prior, LKJCovariancePrior)
            self.assertEqual(task_covar_prior.correlation_prior.eta, 0.5)
            lengthscale_prior = base_kernel.data_covar_module.lengthscale_prior
            self.assertIsInstance(lengthscale_prior, GammaPrior)
            self.assertEqual(lengthscale_prior.concentration, 2.0)
            self.assertEqual(lengthscale_prior.rate, 4.0)
            self.assertEqual(base_kernel.task_covar_module.covar_factor.shape[-1], 1)

            # test model fitting
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=OptimizationWarning)
                mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)

            # test posterior
            max_cholesky_sizes = [1, 800]
            for max_cholesky in max_cholesky_sizes:
                model.train()
                test_x = torch.rand(2, 2, **tkwargs)
                # small root decomp to enforce zero padding
                with max_cholesky_size(max_cholesky), max_root_decomposition_size(3):
                    posterior_f = model.posterior(test_x)
                    self.assertIsInstance(posterior_f, GPyTorchPosterior)
                    self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal)
                    self.assertEqual(posterior_f.mean.shape, torch.Size([2, 2]))
                    self.assertEqual(posterior_f.variance.shape, torch.Size([2, 2]))

            # test observation noise
            posterior_noisy = model.posterior(test_x, observation_noise=True)
            self.assertTrue(
                torch.allclose(
                    posterior_noisy.variance, model.likelihood(posterior_f.mvn).variance
                )
            )

            # test posterior (batch eval)
            test_x = torch.rand(3, 2, 2, **tkwargs)
            posterior_f = model.posterior(test_x)
            self.assertIsInstance(posterior_f, GPyTorchPosterior)
            self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal)
            self.assertEqual(posterior_f.mean.shape, torch.Size([3, 2, 2]))
            self.assertEqual(posterior_f.variance.shape, torch.Size([3, 2, 2]))