예제 #1
0
    def test_transforms(self):
        train_x = torch.rand(10, 3, device=self.device)
        train_y = torch.randn(10, 4, 5, device=self.device)

        # test handling of Standardize
        with self.assertWarns(RuntimeWarning):
            model = HigherOrderGP(train_X=train_x,
                                  train_Y=train_y,
                                  outcome_transform=Standardize(m=5))
        self.assertIsInstance(model.outcome_transform, FlattenedStandardize)
        self.assertEqual(model.outcome_transform.output_shape,
                         train_y.shape[1:])
        self.assertEqual(model.outcome_transform.batch_shape, torch.Size())

        model = HigherOrderGP(
            train_X=train_x,
            train_Y=train_y,
            input_transform=Normalize(d=3),
            outcome_transform=FlattenedStandardize(train_y.shape[1:]),
        )
        mll = ExactMarginalLogLikelihood(model.likelihood, model)
        fit_gpytorch_torch(mll, options={"maxiter": 1, "disp": False})

        test_x = torch.rand(2, 5, 3, device=self.device)
        test_y = torch.randn(2, 5, 4, 5, device=self.device)
        posterior = model.posterior(test_x)
        self.assertIsInstance(posterior, TransformedPosterior)

        conditioned_model = model.condition_on_observations(test_x, test_y)
        self.assertIsInstance(conditioned_model, HigherOrderGP)

        self.check_transform_forward(model)
        self.check_transform_untransform(model)
예제 #2
0
    def setUp(self):
        super().setUp()
        torch.random.manual_seed(0)

        train_x = torch.rand(2, 10, 1, device=self.device)
        train_y = torch.randn(2, 10, 3, 5, device=self.device)

        self.model = HigherOrderGP(train_x, train_y)

        # check that we can assign different kernels and likelihoods
        model_2 = HigherOrderGP(
            train_X=train_x,
            train_Y=train_y,
            covar_modules=[RBFKernel(), RBFKernel(),
                           RBFKernel()],
            likelihood=GaussianLikelihood(),
        )

        model_3 = HigherOrderGP(
            train_X=train_x,
            train_Y=train_y,
            covar_modules=[RBFKernel(), RBFKernel(),
                           RBFKernel()],
            likelihood=GaussianLikelihood(),
            latent_init="gp",
        )

        for m in [self.model, model_2, model_3]:
            mll = ExactMarginalLogLikelihood(m.likelihood, m)
            fit_gpytorch_torch(mll, options={"maxiter": 1, "disp": False})
예제 #3
0
    def setUp(self):
        super().setUp()
        manual_seed(0)

        train_x = rand(2, 10, 1)
        train_y = randn(2, 10, 3, 5)

        train_x = train_x.to(device=self.device)
        train_y = train_y.to(device=self.device)

        self.model = HigherOrderGP(train_x, train_y, first_dim_is_batch=True)

        # check that we can assign different kernels and likelihoods
        model_2 = HigherOrderGP(
            train_x,
            train_y,
            first_dim_is_batch=True,
            covar_modules=[RBFKernel(), RBFKernel(),
                           RBFKernel()],
            likelihood=GaussianLikelihood(),
        )

        for m in [self.model, model_2]:
            mll = ExactMarginalLogLikelihood(m.likelihood, m)
            fit_gpytorch_torch(mll, options={"maxiter": 1, "disp": False})
예제 #4
0
    def test_transforms(self):
        train_x = rand(10, 3, device=self.device)
        train_y = randn(10, 4, 5, device=self.device)
        model = HigherOrderGP(
            train_x,
            train_y,
            input_transform=Normalize(d=3),
            outcome_transform=FlattenedStandardize(train_y.shape[1:]),
        )
        mll = ExactMarginalLogLikelihood(model.likelihood, model)
        fit_gpytorch_torch(mll, options={"maxiter": 1, "disp": False})

        test_x = rand(2, 5, 3, device=self.device)
        test_y = randn(2, 5, 4, 5, device=self.device)
        posterior = model.posterior(test_x)
        self.assertIsInstance(posterior, TransformedPosterior)

        conditioned_model = model.condition_on_observations(test_x, test_y)
        self.assertIsInstance(conditioned_model, HigherOrderGP)

        self.check_transform_forward(model)
        self.check_transform_untransform(model)
예제 #5
0
def main(args):
    if args.cuda and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    init_dict, train_dict, test_dict = prepare_data(
        args.data_loc,
        args.num_init,
        args.num_total,
        test_is_year=False,
        seed=args.seed,
    )
    init_x, init_y, init_y_var = (
        init_dict["x"].to(device),
        init_dict["y"].to(device),
        init_dict["y_var"].to(device),
    )
    train_x, train_y, train_y_var = (
        train_dict["x"].to(device),
        train_dict["y"].to(device),
        train_dict["y_var"].to(device),
    )
    test_x, test_y, test_y_var = (
        test_dict["x"].to(device),
        test_dict["y"].to(device),
        test_dict["y_var"].to(device),
    )

    if args.model == "wiski":
        model = FixedNoiseOnlineSKIGP(
            init_x,
            init_y.view(-1, 1),
            init_y_var.view(-1, 1),
            GridInterpolationKernel(
                base_kernel=ScaleKernel(
                    MaternKernel(
                        ard_num_dims=2,
                        nu=0.5,
                        lengthscale_prior=GammaPrior(3.0, 6.0),
                    ),
                    outputscale_prior=GammaPrior(2.0, 0.15),
                ),
                grid_size=30,
                num_dims=2,
                grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]),
            ),
            learn_additional_noise=False,
        ).to(device)

        mll_type = lambda x, y: BatchedWoodburyMarginalLogLikelihood(
            x, y, clear_caches_every_iteration=True)
    elif args.model == "exact":
        model = FixedNoiseGP(
            init_x,
            init_y.view(-1, 1),
            init_y_var.view(-1, 1),
            ScaleKernel(
                MaternKernel(
                    ard_num_dims=2,
                    nu=0.5,
                    lengthscale_prior=GammaPrior(3.0, 6.0),
                ),
                outputscale_prior=GammaPrior(2.0, 0.15),
            ),
        ).to(device)
        mll_type = ExactMarginalLogLikelihood

    mll = mll_type(model.likelihood, model)

    print("---- Fitting initial model ----")
    start = time.time()
    model.train()
    model.zero_grad()
    # with max_cholesky_size(args.cholesky_size), skip_logdet_forward(True), \
    #       use_toeplitz(args.toeplitz), max_root_decomposition_size(args.sketch_size):
    fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000})
    end = time.time()
    print("Elapsed fitting time: ", end - start)
    print("Named parameters: ", list(model.named_parameters()))

    print("--- Now computing initial RMSE")
    model.eval()
    with gpytorch.settings.skip_posterior_variances(True):
        test_pred = model(test_x)
        pred_rmse = ((test_pred.mean - test_y)**2).mean().sqrt()

    print("---- Initial RMSE: ", pred_rmse.item())

    all_outputs = []
    start_ind = init_x.shape[0]
    end_ind = int(start_ind + args.batch_size)
    for step in range(args.num_steps):
        if step > 0 and step % 25 == 0:
            print("Beginning step ", step)

        total_time_step_start = time.time()

        if step > 0:
            print("---- Fitting model ----")
            start = time.time()
            model.train()
            model.zero_grad()
            mll = mll_type(model.likelihood, model)
            # with skip_logdet_forward(True), max_root_decomposition_size(
            #         args.sketch_size
            #     ), max_cholesky_size(args.cholesky_size), use_toeplitz(
            #         args.toeplitz
            #     ):
            fit_gpytorch_torch(mll,
                               options={
                                   "lr": 0.01 * (0.99**step),
                                   "maxiter": 300
                               })

            model.zero_grad()
            end = time.time()
            print("Elapsed fitting time: ", end - start)
            print("Named parameters: ", list(model.named_parameters()))

        if not args.random:
            if args.model == "wiski":
                botorch_model = OnlineSKIBotorchModel(model=model)
            else:
                botorch_model = model

            # qmc_sampler = SobolQMCNormalSampler(num_samples=4)

            bounds = torch.stack([torch.zeros(2), torch.ones(2)]).to(device)
            qnipv = qNIPV(
                model=botorch_model,
                mc_points=test_x,
                # sampler=qmc_sampler,
            )

            #with use_toeplitz(args.toeplitz), root_pred_var(True), fast_pred_var(True):
            candidates, acq_value = optimize_acqf(
                acq_function=qnipv,
                bounds=bounds,
                q=args.batch_size,
                num_restarts=1,
                raw_samples=10,  # used for intialization heuristic
                options={
                    "batch_limit": 5,
                    "maxiter": 200
                },
            )
        else:
            candidates = torch.rand(args.batch_size,
                                    train_x.shape[-1],
                                    device=device,
                                    dtype=train_x.dtype)
            acq_value = torch.zeros(1)
            model.eval()
            _ = model(test_x[:10])  # to init caches

        print("---- Finished optimizing; now querying dataset ---- ")
        with torch.no_grad():
            covar_dists = model.covar_module(candidates, train_x)
            nearest_points = covar_dists.evaluate().argmax(dim=-1)
            new_x = train_x[nearest_points]
            new_y = train_y[nearest_points]
            new_y_var = train_y_var[nearest_points]

            todrop = torch.tensor(
                [x in nearest_points for x in range(train_x.shape[0])])
            train_x, train_y, train_y_var = train_x[~todrop], train_y[
                ~todrop], train_y_var[~todrop]
            print("New train_x shape", train_x.shape)
            print("--- Now updating model with simulator ----")
            model = model.condition_on_observations(X=new_x,
                                                    Y=new_y.view(-1, 1),
                                                    noise=new_y_var.view(
                                                        -1, 1))

        print("--- Now computing updated RMSE")
        model.eval()
        # with gpytorch.settings.fast_pred_var(True), \
        #     detach_test_caches(True), \
        #     max_root_decomposition_size(args.sketch_size), \
        #     max_cholesky_size(args.cholesky_size), \
        #     use_toeplitz(args.toeplitz), root_pred_var(True):
        test_pred = model(test_x)
        pred_rmse = ((test_pred.mean.view(-1) -
                      test_y.view(-1))**2).mean().sqrt()
        pred_avg_variance = test_pred.variance.mean()

        total_time_step_elapsed_time = time.time() - total_time_step_start
        step_output_list = [
            total_time_step_elapsed_time,
            acq_value.item(),
            pred_rmse.item(),
            pred_avg_variance.item()
        ]
        print("Step RMSE: ", pred_rmse)
        all_outputs.append(step_output_list)

        start_ind = end_ind
        end_ind = int(end_ind + args.batch_size)

    output_dict = {
        "model_state_dict": model.cpu().state_dict(),
        "queried_points": {
            'x': model.cpu().train_inputs[0],
            'y': model.cpu().train_targets
        },
        "results": DataFrame(all_outputs)
    }
    torch.save(output_dict, args.output)
예제 #6
0
def main(args):
    if args.cuda and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    init_dict, train_dict, test_dict = prepare_data(args.data_loc,
                                                    args.num_init,
                                                    args.num_total,
                                                    test_is_year=False)
    init_x, init_y, init_y_var = (
        init_dict["x"].to(device),
        init_dict["y"].to(device),
        init_dict["y_var"].to(device),
    )
    train_x, train_y, train_y_var = (
        train_dict["x"].to(device),
        train_dict["y"].to(device),
        train_dict["y_var"].to(device),
    )
    test_x, test_y, test_y_var = (
        test_dict["x"].to(device),
        test_dict["y"].to(device),
        test_dict["y_var"].to(device),
    )

    model = FixedNoiseOnlineSKIGP(
        init_x,
        init_y.view(-1, 1),
        init_y_var.view(-1, 1),
        GridInterpolationKernel(
            base_kernel=ScaleKernel(
                MaternKernel(
                    ard_num_dims=2,
                    nu=0.5,
                    lengthscale_prior=GammaPrior(3.0, 6.0),
                ),
                outputscale_prior=GammaPrior(2.0, 0.15),
            ),
            grid_size=30,
            num_dims=2,
            grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]),
        ),
        learn_additional_noise=False,
    ).to(device)

    mll = BatchedWoodburyMarginalLogLikelihood(model.likelihood, model)

    print("---- Fitting initial model ----")
    start = time.time()
    with skip_logdet_forward(True), max_root_decomposition_size(
            args.sketch_size), use_toeplitz(args.toeplitz):
        fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000})
    end = time.time()
    print("Elapsed fitting time: ", end - start)

    model.zero_grad()
    model.eval()

    print("--- Generating initial predictions on test set ----")
    start = time.time()
    with detach_test_caches(True), max_root_decomposition_size(
            args.sketch_size), max_cholesky_size(
                args.cholesky_size), use_toeplitz(args.toeplitz):
        pred_dist = model(test_x)

        pred_mean = pred_dist.mean.detach()
        # pred_var = pred_dist.variance.detach()
    end = time.time()
    print("Elapsed initial prediction time: ", end - start)

    rmse_initial = ((pred_mean.view(-1) - test_y.view(-1))**2).mean().sqrt()
    print("Initial RMSE: ", rmse_initial.item())

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

    mll_time_list = []
    rmse_list = []
    for i in range(500, train_x.shape[0]):
        model.zero_grad()
        model.train()

        start = time.time()
        with skip_logdet_forward(True), max_root_decomposition_size(
                args.sketch_size), max_cholesky_size(
                    args.cholesky_size), use_toeplitz(args.toeplitz):
            loss = -mll(model(train_x[:i]), train_y[:i]).sum()

        loss.backward()
        mll_time = start - time.time()

        optimizer.step()
        model.zero_grad()
        optimizer.zero_grad()
        start = time.time()
        with torch.no_grad():
            model.condition_on_observations(
                train_x[i].unsqueeze(0),
                train_y[i].view(1, 1),
                train_y_var[i].view(-1, 1),
                inplace=True,
            )
        fantasy_time = start - time.time()
        mll_time_list.append([-mll_time, -fantasy_time])

        if i % 25 == 0:
            start = time.time()
            model.eval()
            model.zero_grad()

            with detach_test_caches(), max_root_decomposition_size(
                    args.sketch_size), max_cholesky_size(args.cholesky_size):
                pred_dist = model(test_x)
            end = time.time()

            rmse = (((pred_dist.mean -
                      test_y.view(-1))**2).mean().sqrt().item())
            rmse_list.append([rmse, end - start])
            print("Current RMSE: ", rmse)
            print("Outputscale: ",
                  model.covar_module.base_kernel.raw_outputscale)
            print(
                "Lengthscale: ",
                model.covar_module.base_kernel.base_kernel.raw_lengthscale,
            )

            print("Step: ", i, "Train Loss: ", loss)
            optimizer.param_groups[0]["lr"] *= 0.9

    torch.save({
        "training": mll_time_list,
        "predictions": rmse_list
    }, args.output)
예제 #7
0
def main(args):
    if args.batch_size > 1 and args.acqf == "mves":
        raise NotImplementedError(
            "Cyclic optimization is not implemented for MVES currently. Please use a batch size of 1."
        )
    if args.cuda and torch.cuda.is_available():
        args.device = torch.device("cuda:0")
    else:
        args.device = torch.device("cpu")

    torch.random.manual_seed(args.seed)
    test_function = prepare_function(args, args.device)
    init_x, init_y, y_means, latent_y = initialize_random_data(
        test_function, args.device, args.num_init
    )

    bounds = test_function.bounds.t()

    unit_bounds = torch.ones_like(bounds)
    unit_bounds[:, 0] = 0.0

    noise = args.noise ** 2 * torch.ones_like(init_y) if args.fixed_noise else None

    if args.model == "wiski":

        def initialize_model(X, Y, old_model=None, **kwargs):
            if old_model is None:
                covar_module = ScaleKernel(
                    MaternKernel(
                        nu=2.5,
                        lengthscale_prior=GammaPrior(3.0, 6.0),
                        lengthscale_constraint=Interval(1e-4, 12.0),
                    ),
                    outputscale_prior=GammaPrior(2.0, 0.15),
                    outputscale_constraint=Interval(1e-4, 12.0),
                )
            else:
                covar_module = old_model.covar_module

            if args.dim == 3:
                wiski_grid_size = 10
            elif args.dim == 2:
                wiski_grid_size = 30

            kernel_cache = old_model._kernel_cache if old_model is not None else None

            model_obj = OnlineSKIBotorchModel(
                X,
                Y,
                train_noise_term=noise,
                grid_bounds=bounds,
                grid_size=wiski_grid_size,
                learn_additional_noise=True,
                kernel_cache=kernel_cache,
                covar_module=covar_module,
            ).to(X)

            mll = BatchedWoodburyMarginalLogLikelihood(
                model_obj.likelihood, model_obj, clear_caches_every_iteration=True
            )
            # TODO: reload statedict here?
            # weird errors resulting

            return model_obj, mll

    elif args.model == "exact":

        def initialize_model(X, Y, old_model=None, **kwargs):
            if old_model is None:
                covar_module = ScaleKernel(
                    MaternKernel(
                        nu=2.5,
                        lengthscale_prior=GammaPrior(3.0, 6.0),
                        lengthscale_constraint=Interval(1e-4, 12.0),
                    ),
                    outputscale_prior=GammaPrior(2.0, 0.15),
                    outputscale_constraint=Interval(1e-4, 12.0),
                )

                if args.fixed_noise:
                    model_obj = FixedNoiseGP(
                        X, Y, train_Yvar=noise, covar_module=covar_module
                    )
                else:
                    model_obj = SingleTaskGP(X, Y, covar_module=covar_module)
            else:
                model_obj = old_model
            mll = ExactMarginalLogLikelihood(model_obj.likelihood, model_obj)
            return model_obj, mll

    elif args.model == "osvgp":

        def initialize_model(X, Y, old_model=None, **kwargs):
            if old_model is None:
                if args.dim == 3:
                    wiski_grid_size = 10
                elif args.dim == 2:
                    wiski_grid_size = 30

                grid_list = create_grid([wiski_grid_size] * args.dim, grid_bounds=bounds)
                inducing_points = (
                    torch.stack([x.reshape(-1) for x in torch.meshgrid(grid_list)])
                    .t()
                    .contiguous()
                    .clone()
                )

                likelihood = GaussianLikelihood()
                model_base = VariationalGPModel(
                    inducing_points,
                    likelihood=likelihood,
                    beta=1.0,
                    learn_inducing_locations=True,
                )
                model_obj = ApproximateGPyTorchModel(
                    model_base, likelihood, num_outputs=1
                )
                model_base.train_inputs = [X]
                model_base.train_targets = Y.view(-1)

                # we don't implement fixednoiseGaussian likelihoods for the streaming setting
                if args.fixed_noise:
                    model_obj.likelihood.noise = args.noise ** 2
                    model_obj.likelihood.requires_grad = False
            else:
                model_obj = old_model
                model_obj.train_inputs = [X]
                model_obj.train_targets = Y.view(-1)

            mll = VariationalELBO(
                model_obj.likelihood, model_obj.model, num_data=X.shape[-2]
            )
            return model_obj, mll

    train_x, train_y = init_x, init_y
    model_obj = None

    all_outputs = []
    for step in range(args.num_steps):
        t0 = time.time()
        model_obj, mll = initialize_model(train_x, train_y, old_model=model_obj)
        model_obj = model_obj.to(train_x)

        # fitting with LBFGSB is really slow due to the inducing points
        if args.model != "osvgp":
            fit_gpytorch_model(mll)
        else:
            fit_gpytorch_torch(mll, options={"maxiter": 1000})

        t0_total = time.time() - t0

        acqf = prepare_acquisition_function(
            args, model_obj, train_x, train_y, bounds, step
        )

        t1 = time.time()

        (
            new_x_ei,
            new_obj_unstandardized,
            new_latent_obj,
        ) = optimize_acqf_and_get_observation(
            acqf,
            bounds=unit_bounds.t(),
            test_function_bounds=bounds.t(),
            batch_size=args.batch_size,
            test_function=test_function,
        )
        new_obj_ei = (new_obj_unstandardized - y_means["mean"]) / y_means["std"]

        train_x = torch.cat((train_x, new_x_ei), dim=0)
        train_y = torch.cat((train_y, new_obj_ei), dim=0)
        latent_y = torch.cat((latent_y, new_latent_obj), dim=0)
        if noise is not None:
            new_noise = args.noise ** 2 * torch.ones_like(new_obj_ei)
            noise = torch.cat((noise, new_noise), dim=0)
        else:
            new_noise = None
        t1_total = time.time() - t1

        t2 = time.time()
        if args.model != "osvgp":
            if args.fixed_noise:
                kwargs = {"noise": new_noise}
            else:
                kwargs = {}
            model_obj = model_obj.condition_on_observations(
                X=new_x_ei, Y=new_obj_ei, **kwargs
            )
        if args.model == "osvgp":
            model_obj.model.update_variational_parameters(
                new_x=new_x_ei, new_y=new_obj_ei
            )
        t2_total = time.time() - t2
        total = t0_total + t1_total + t2_total

        max_achieved = train_y.max() * y_means["std"] + y_means["mean"]
        max_latent_achieved = latent_y.max()
        output_lists = [
            t0_total,
            t1_total,
            t2_total,
            total,
            max_achieved.item(),
            max_latent_achieved.item(),
        ]
        all_outputs.append(output_lists)

        if step % (args.num_steps // 5) == 0:
            print(
                "Step ",
                step,
                " of ",
                args.num_steps,
                "Max Achieved: ",
                max_achieved.item(),
                "Max Latent Achieved: ",
                max_latent_achieved.item(),
            )

    for key in y_means:
        y_means[key] = y_means[key].cpu()

    output_dict = {
        "observations": {
            "x": train_x.cpu(),
            "y": train_y.cpu(),
            "means": y_means,
            "latent_y": latent_y.cpu(),
        },
        "results": DataFrame(all_outputs),
        "args": args
    }
    torch.save(output_dict, args.output)