예제 #1
0
def _get_given_covar_module_model(**tkwargs):
    train_X, train_Y = _get_random_mt_data(**tkwargs)
    model = MultiTaskGP(
        train_X,
        train_Y,
        task_feature=1,
        covar_module=RBFKernel(lengthscale_prior=LogNormalPrior(0.0, 1.0)),
    )
    return model.to(**tkwargs)
예제 #2
0
    def test_pyro_sampling(self):
        try:
            import pyro
            from pyro.infer.mcmc import NUTS, MCMC
        except:
            return
        train_x, test_x, train_y, test_y = self._get_data(cuda=False)
        likelihood = GaussianLikelihood(
            noise_constraint=gpytorch.constraints.Positive())
        gp_model = ExactGPModel(train_x, train_y, likelihood)

        # Register normal GPyTorch priors
        gp_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1),
                                            "constant")
        gp_model.covar_module.base_kernel.register_prior(
            "lengthscale_prior", UniformPrior(0.01, 0.2), "lengthscale")
        gp_model.covar_module.register_prior("outputscale_prior",
                                             UniformPrior(1, 2), "outputscale")
        likelihood.register_prior("noise_prior", LogNormalPrior(-1.5, 0.1),
                                  "noise")

        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model)

        def pyro_model(x, y):
            gp_model.pyro_sample_from_prior()
            output = gp_model(x)
            loss = mll.pyro_factor(output, y)
            return y

        nuts_kernel = NUTS(pyro_model, adapt_step_size=True)
        mcmc_run = MCMC(nuts_kernel, num_samples=3, warmup_steps=20)
        mcmc_run.run(train_x, train_y)

        gp_model.pyro_load_from_samples(mcmc_run.get_samples())

        gp_model.eval()
        expanded_test_x = test_x.unsqueeze(-1).repeat(3, 1, 1)
        output = gp_model(expanded_test_x)

        self.assertEqual(output.mean.size(0), 3)

        # All 3 samples should do reasonably well on a noiseless dataset.
        self.assertLess(
            torch.norm(output.mean[0] - test_y) / test_y.norm(), 0.2)
        self.assertLess(
            torch.norm(output.mean[1] - test_y) / test_y.norm(), 0.2)
        self.assertLess(
            torch.norm(output.mean[2] - test_y) / test_y.norm(), 0.2)
예제 #3
0
    def test_warp_transform(self):
        for dtype in (torch.float, torch.double):
            tkwargs = {"device": self.device, "dtype": dtype}

            # basic init
            indices = [0, 2]
            warp_tf = get_test_warp(indices).to(**tkwargs)
            self.assertTrue(warp_tf.training)

            k = Kumaraswamy(warp_tf.concentration1, warp_tf.concentration0)

            self.assertEqual(warp_tf.indices.tolist(), indices)

            # basic usage
            for batch_shape in (torch.Size(), torch.Size([3])):
                X = torch.rand(*batch_shape, 4, 3, **tkwargs)
                with torch.no_grad():
                    warp_tf = get_test_warp(indices=indices).to(**tkwargs)
                    X_tf = warp_tf(X)
                    expected_X_tf = X.clone()
                    expected_X_tf[..., indices] = k.cdf(
                        expected_X_tf[..., indices].clamp_min(
                            warp_tf.eps).clamp_max(1 - warp_tf.eps))
                    # check non-integers parameters are unchanged
                    self.assertTrue(torch.equal(expected_X_tf, X_tf))

                    # test untransform
                    untransformed_X = warp_tf.untransform(X_tf)

                    print(untransformed_X)
                    print(X)
                    self.assertTrue(
                        torch.allclose(untransformed_X,
                                       X,
                                       rtol=1e-3,
                                       atol=1e-3))

                    # test no transform on eval
                    warp_tf = get_test_warp(
                        indices, transform_on_eval=False).to(**tkwargs)
                    X_tf = warp_tf(X)
                    self.assertFalse(torch.equal(X, X_tf))
                    warp_tf.eval()
                    X_tf = warp_tf(X)
                    self.assertTrue(torch.equal(X, X_tf))

                    # test no transform on train
                    warp_tf = get_test_warp(
                        indices=indices,
                        transform_on_train=False).to(**tkwargs)
                    X_tf = warp_tf(X)
                    self.assertTrue(torch.equal(X, X_tf))
                    warp_tf.eval()
                    X_tf = warp_tf(X)
                    self.assertFalse(torch.equal(X, X_tf))

                    # test equals
                    warp_tf2 = get_test_warp(
                        indices=indices,
                        transform_on_train=False).to(**tkwargs)
                    self.assertTrue(warp_tf.equals(warp_tf2))
                    # test different transform_on_train
                    warp_tf2 = get_test_warp(indices=indices)
                    self.assertFalse(warp_tf.equals(warp_tf2))
                    # test different indices
                    warp_tf2 = get_test_warp(
                        indices=[0, 1], transform_on_train=False).to(**tkwargs)
                    self.assertFalse(warp_tf.equals(warp_tf2))

                    # test preprocess_transform
                    self.assertTrue(
                        torch.equal(warp_tf.preprocess_transform(X), X))
                    warp_tf.transform_on_preprocess = True
                    self.assertTrue(
                        torch.equal(warp_tf.preprocess_transform(X), X_tf))

                    # test _set_concentration
                    warp_tf._set_concentration(0, warp_tf.concentration0.shape)
                    warp_tf._set_concentration(1, warp_tf.concentration1.shape)

                    # test concentration prior
                    prior0 = LogNormalPrior(0.0, 0.75).to(**tkwargs)
                    prior1 = LogNormalPrior(0.0, 0.5).to(**tkwargs)
                    warp_tf = get_test_warp(
                        indices=[0, 1],
                        concentration0_prior=prior0,
                        concentration1_prior=prior1,
                    )
                    for i, (name, module, p, _,
                            _) in enumerate(warp_tf.named_priors()):
                        self.assertEqual(name, f"concentration{i}_prior")
                        self.assertIsInstance(p, LogNormalPrior)
                        self.assertEqual(p.base_dist.scale,
                                         0.75 if i == 0 else 0.5)

                # test gradients
                X = 1 + 5 * torch.rand(*batch_shape, 4, 3, **tkwargs)
                warp_tf = get_test_warp(indices=indices).to(**tkwargs)
                X_tf = warp_tf(X)
                X_tf.sum().backward()
                for grad in (warp_tf.concentration0.grad,
                             warp_tf.concentration1.grad):
                    self.assertIsNotNone(grad)
                    self.assertFalse(torch.isnan(grad).any())
                    self.assertFalse(torch.isinf(grad).any())
                    self.assertFalse((grad == 0).all())
예제 #4
0
def gp_torch_train(train_x: Tensor,
                   train_y: Tensor,
                   n_inducing_points: int,
                   tkwargs: Dict[str, Any],
                   init,
                   scale: bool,
                   covar_name: str,
                   gp_file: Optional[str],
                   save_file: str,
                   input_wp: bool,
                   outcome_transform: Optional[OutcomeTransform] = None,
                   options: Dict[str, Any] = None) -> SingleTaskGP:
    assert train_y.ndim > 1, train_y.shape
    assert gp_file or init, (gp_file, init)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()

    if init:
        # build hyp
        print("Initialize GP hparams...")
        print("Doing Kmeans init...")
        assert n_inducing_points > 0, n_inducing_points
        kmeans = MiniBatchKMeans(n_clusters=n_inducing_points,
                                 batch_size=min(10000, train_x.shape[0]),
                                 n_init=25)
        start_time = time.time()
        kmeans.fit(train_x.cpu().numpy())
        end_time = time.time()
        print(f"K means took {end_time - start_time:.1f}s to finish...")
        inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy())

        output_scale = None
        if scale:
            output_scale = train_y.var().item()
        lscales = torch.empty(1, train_x.shape[1])
        for i in range(train_x.shape[1]):
            lscales[0, i] = torch.pdist(train_x[:, i].view(
                -1, 1)).median().clamp(min=0.01)
        base_covar_module = query_covar(covar_name=covar_name,
                                        scale=scale,
                                        outputscale=output_scale,
                                        lscales=lscales)

        covar_module = InducingPointKernel(base_covar_module,
                                           inducing_points=inducing_points,
                                           likelihood=likelihood)

        input_warp_tf = None
        if input_wp:
            # Apply input warping
            # initialize input_warping transformation
            input_warp_tf = CustomWarp(
                indices=list(range(train_x.shape[-1])),
                # use a prior with median at 1.
                # when a=1 and b=1, the Kumaraswamy CDF is the identity function
                concentration1_prior=LogNormalPrior(0.0, 0.75**0.5),
                concentration0_prior=LogNormalPrior(0.0, 0.75**0.5),
            )

        model = SingleTaskGP(train_x,
                             train_y,
                             covar_module=covar_module,
                             likelihood=likelihood,
                             input_transform=input_warp_tf,
                             outcome_transform=outcome_transform)
    else:
        # load model
        output_scale = 1  # will be overwritten when loading model
        lscales = torch.ones(
            train_x.shape[1])  # will be overwritten when loading model
        base_covar_module = query_covar(covar_name=covar_name,
                                        scale=scale,
                                        outputscale=output_scale,
                                        lscales=lscales)
        covar_module = InducingPointKernel(base_covar_module,
                                           inducing_points=torch.empty(
                                               n_inducing_points,
                                               train_x.shape[1]),
                                           likelihood=likelihood)

        input_warp_tf = None
        if input_wp:
            # Apply input warping
            # initialize input_warping transformation
            input_warp_tf = Warp(
                indices=list(range(train_x.shape[-1])),
                # use a prior with median at 1.
                # when a=1 and b=1, the Kumaraswamy CDF is the identity function
                concentration1_prior=LogNormalPrior(0.0, 0.75**0.5),
                concentration0_prior=LogNormalPrior(0.0, 0.75**0.5),
            )
        model = SingleTaskGP(train_x,
                             train_y,
                             covar_module=covar_module,
                             likelihood=likelihood,
                             input_transform=input_warp_tf,
                             outcome_transform=outcome_transform)
        print("Loading GP from file")
        state_dict = torch.load(gp_file)
        model.load_state_dict(state_dict)

    print("GP regression")
    start_time = time.time()
    model.to(**tkwargs)
    model.train()

    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    # set approx_mll to False since we are using an exact marginal log likelihood
    # fit_gpytorch_model(mll, optimizer=fit_gpytorch_torch, approx_mll=False, options=options)
    fit_gpytorch_torch(mll,
                       options=options,
                       approx_mll=False,
                       clip_by_value=True if input_wp else False,
                       clip_value=10.0)
    end_time = time.time()
    print(f"Regression took {end_time - start_time:.1f}s to finish...")

    print("Save GP model...")
    torch.save(model.state_dict(), save_file)
    print("Done training of GP.")

    model.eval()
    return model
예제 #5
0
    def test_warp_transform(self):
        for dtype, batch_shape, warp_batch_shape in itertools.product(
            (torch.float, torch.double),
            (torch.Size(), torch.Size([3])),
            (torch.Size(), torch.Size([2])),
        ):
            tkwargs = {"device": self.device, "dtype": dtype}
            eps = 1e-6 if dtype == torch.double else 1e-5

            # basic init
            indices = [0, 2]
            warp_tf = get_test_warp(indices,
                                    batch_shape=warp_batch_shape,
                                    eps=eps).to(**tkwargs)
            self.assertTrue(warp_tf.training)

            k = Kumaraswamy(warp_tf.concentration1, warp_tf.concentration0)

            self.assertEqual(warp_tf.indices.tolist(), indices)

            # We don't want these data points to end up all the way near zero, since
            # this would cause numerical issues and thus result in a flaky test.
            X = 0.025 + 0.95 * torch.rand(*batch_shape, 4, 3, **tkwargs)
            X = X.unsqueeze(-3) if len(warp_batch_shape) > 0 else X
            with torch.no_grad():
                warp_tf = get_test_warp(indices=indices,
                                        batch_shape=warp_batch_shape,
                                        eps=eps).to(**tkwargs)
                X_tf = warp_tf(X)
                expected_X_tf = expand_and_copy_tensor(
                    X, batch_shape=warp_tf.batch_shape)
                expected_X_tf[...,
                              indices] = k.cdf(expected_X_tf[..., indices] *
                                               warp_tf._X_range +
                                               warp_tf._X_min)

                self.assertTrue(torch.equal(expected_X_tf, X_tf))

                # test untransform
                untransformed_X = warp_tf.untransform(X_tf)
                self.assertTrue(
                    torch.allclose(
                        untransformed_X,
                        expand_and_copy_tensor(
                            X, batch_shape=warp_tf.batch_shape),
                        rtol=1e-3,
                        atol=1e-3
                        if self.device == torch.device("cpu") else 1e-2,
                    ))
                if len(warp_batch_shape) > 0:
                    with self.assertRaises(BotorchTensorDimensionError):
                        warp_tf.untransform(X_tf.unsqueeze(-3))

                # test no transform on eval
                warp_tf = get_test_warp(
                    indices,
                    transform_on_eval=False,
                    batch_shape=warp_batch_shape,
                    eps=eps,
                ).to(**tkwargs)
                X_tf = warp_tf(X)
                self.assertFalse(torch.equal(X, X_tf))
                warp_tf.eval()
                X_tf = warp_tf(X)
                self.assertTrue(torch.equal(X, X_tf))

                # test no transform on train
                warp_tf = get_test_warp(
                    indices=indices,
                    transform_on_train=False,
                    batch_shape=warp_batch_shape,
                    eps=eps,
                ).to(**tkwargs)
                X_tf = warp_tf(X)
                self.assertTrue(torch.equal(X, X_tf))
                warp_tf.eval()
                X_tf = warp_tf(X)
                self.assertFalse(torch.equal(X, X_tf))

                # test equals
                warp_tf2 = get_test_warp(
                    indices=indices,
                    transform_on_train=False,
                    batch_shape=warp_batch_shape,
                    eps=eps,
                ).to(**tkwargs)
                self.assertTrue(warp_tf.equals(warp_tf2))
                # test different transform_on_train
                warp_tf2 = get_test_warp(indices=indices,
                                         batch_shape=warp_batch_shape,
                                         eps=eps)
                self.assertFalse(warp_tf.equals(warp_tf2))
                # test different indices
                warp_tf2 = get_test_warp(
                    indices=[0, 1],
                    transform_on_train=False,
                    batch_shape=warp_batch_shape,
                    eps=eps,
                ).to(**tkwargs)
                self.assertFalse(warp_tf.equals(warp_tf2))

                # test preprocess_transform
                warp_tf.transform_on_train = False
                self.assertTrue(torch.equal(warp_tf.preprocess_transform(X),
                                            X))
                warp_tf.transform_on_train = True
                self.assertTrue(
                    torch.equal(warp_tf.preprocess_transform(X), X_tf))

                # test _set_concentration
                warp_tf._set_concentration(0, warp_tf.concentration0)
                warp_tf._set_concentration(1, warp_tf.concentration1)

                # test concentration prior
                prior0 = LogNormalPrior(0.0, 0.75).to(**tkwargs)
                prior1 = LogNormalPrior(0.0, 0.5).to(**tkwargs)
                warp_tf = get_test_warp(
                    indices=[0, 1],
                    concentration0_prior=prior0,
                    concentration1_prior=prior1,
                    batch_shape=warp_batch_shape,
                    eps=eps,
                )
                for i, (name, _, p, _, _) in enumerate(warp_tf.named_priors()):
                    self.assertEqual(name, f"concentration{i}_prior")
                    self.assertIsInstance(p, LogNormalPrior)
                    self.assertEqual(p.base_dist.scale,
                                     0.75 if i == 0 else 0.5)

            # test gradients
            X = 1 + 5 * torch.rand(*batch_shape, 4, 3, **tkwargs)
            X = X.unsqueeze(-3) if len(warp_batch_shape) > 0 else X
            warp_tf = get_test_warp(indices=indices,
                                    batch_shape=warp_batch_shape,
                                    eps=eps).to(**tkwargs)
            X_tf = warp_tf(X)
            X_tf.sum().backward()
            for grad in (warp_tf.concentration0.grad,
                         warp_tf.concentration1.grad):
                self.assertIsNotNone(grad)
                self.assertFalse(torch.isnan(grad).any())
                self.assertFalse(torch.isinf(grad).any())
                self.assertFalse((grad == 0).all())

            # test set with scalar
            warp_tf._set_concentration(i=0, value=2.0)
            self.assertTrue((warp_tf.concentration0 == 2.0).all())
            warp_tf._set_concentration(i=1, value=3.0)
            self.assertTrue((warp_tf.concentration1 == 3.0).all())