Exemple #1
0
def test_cglb_predict():
    """
    Test that 1.) The predict method returns the same variance estimate as SGPR.
              2.) The predict method returns the same mean as SGPR for v=0.
              3.) The predict method returns a mean very similar to GPR when CG is run to low tolerance.
    """
    rng: np.random.RandomState = np.random.RandomState(999)
    train, z, xs = data(rng)
    noise = 0.2

    gpr = GPR(train, kernel=SquaredExponential(), noise_variance=noise)
    sgpr = SGPR(train,
                kernel=SquaredExponential(),
                inducing_variable=z,
                noise_variance=noise)

    cglb = CGLB(
        train,
        kernel=SquaredExponential(),
        inducing_variable=z,
        noise_variance=noise,
    )

    gpr_mean, _ = gpr.predict_y(xs, full_cov=False)
    sgpr_mean, sgpr_cov = sgpr.predict_y(xs, full_cov=False)
    cglb_mean, cglb_cov = cglb.predict_y(
        xs, full_cov=False,
        cg_tolerance=1e6)  # set tolerance high so v stays at 0.

    assert np.allclose(sgpr_cov, cglb_cov)
    assert np.allclose(sgpr_mean, cglb_mean)

    cglb_mean, _ = cglb.predict_y(xs, full_cov=False, cg_tolerance=1e-12)

    assert np.allclose(gpr_mean, cglb_mean)
Exemple #2
0
def compute_residual_eig(Y: np.ndarray, Kx: np.ndarray) -> np.ndarray:
    """Residual of Y based on Kx, a kernel matrix of X"""
    assert len(Y) == len(Kx)

    eig_Kx, eix = truncated_eigen(*eigdec(Kx, min(100, len(Kx) // 4)))
    phi_X = eix @ np.diag(np.sqrt(eig_Kx))  # X @ X.T is close to K_X
    n_feats = phi_X.shape[1]

    linear_kernel = Linear(n_feats, ARD=True)
    gp_model = GPR(phi_X, Y, linear_kernel + White(n_feats))
    gp_model.optimize()

    new_Kx = linear_kernel.compute_K_symm(phi_X)
    sigma_squared = gp_model.kern.white.variance.value[0]

    return (pdinv(np.eye(len(Kx)) + new_Kx / sigma_squared) @ Y).squeeze()
Exemple #3
0
def residual_kernel(K_Y: np.ndarray, K_X: np.ndarray, use_expectation=True, with_gp=True, sigma_squared=1e-3, return_learned_K_X=False):
    """Kernel matrix of residual of Y given X based on their kernel matrices, Y=f(X)"""
    import gpflow
    from gpflow.kernels import White, Linear
    from gpflow.models import GPR

    K_Y, K_X = centering(K_Y), centering(K_X)
    T = len(K_Y)

    if with_gp:
        eig_Ky, eiy = truncated_eigen(*eigdec(K_Y, min(100, T // 4)))
        eig_Kx, eix = truncated_eigen(*eigdec(K_X, min(100, T // 4)))

        X = eix @ diag(sqrt(eig_Kx))  # X @ X.T is close to K_X
        Y = eiy @ diag(sqrt(eig_Ky))
        n_feats = X.shape[1]

        linear = Linear(n_feats, ARD=True)
        white = White(n_feats)
        gp_model = GPR(X, Y, linear + white)
        gpflow.train.ScipyOptimizer().minimize(gp_model)

        K_X = linear.compute_K_symm(X)
        sigma_squared = white.variance.value

    P = pdinv(np.eye(T) + K_X / sigma_squared)  # == I-K @ inv(K+Sigma) in Zhang et al. 2011
    if use_expectation:  # Flaxman et al. 2016 Gaussian Processes for Independence Tests with Non-iid Data in Causal Inference.
        RK = (K_X + P @ K_Y) @ P
    else:  # Zhang et al. 2011. Kernel-based Conditional Independence Test and Application in Causal Discovery.
        RK = P @ K_Y @ P

    if return_learned_K_X:
        return RK, K_X
    else:
        return RK
Exemple #4
0
def residualize(Y, X=None, gp_kernel=None):
    """Residual of Y given X. Y_i - E[Y_i|X_i]"""
    import gpflow
    from gpflow.models import GPR

    if X is None:
        return Y - np.mean(Y)  # nothing is residualized!

    if gp_kernel is None:
        gp_kernel = default_gp_kernel(X)

    m = GPR(X, Y, gp_kernel)
    gpflow.train.ScipyOptimizer().minimize(m)

    Yhat, _ = m.predict_y(X)
    return Y - Yhat
Exemple #5
0
def regression_distance_k(Kx: np.ndarray, Ky: np.ndarray):
    warnings.warn('not tested yet!')
    import gpflow
    from gpflow.kernels import White, Linear
    from gpflow.models import GPR

    T = len(Kx)

    eig_Ky, eiy = truncated_eigen(*eigdec(Ky, min(100, T // 4)))
    eig_Kx, eix = truncated_eigen(*eigdec(Kx, min(100, T // 4)))

    X = eix @ diag(sqrt(eig_Kx))  # X @ X.T is close to K_X
    Y = eiy @ diag(sqrt(eig_Ky))
    n_feats = X.shape[1]

    linear = Linear(n_feats, ARD=True)
    white = White(n_feats)
    gp_model = GPR(X, Y, linear + white)
    gpflow.train.ScipyOptimizer().minimize(gp_model)

    Kx = linear.compute_K_symm(X)
    sigma_squared = white.variance.value

    P = Kx @ pdinv(Kx + sigma_squared * np.eye(T))

    M = P @ Ky @ P
    O = np.ones((T, 1))
    N = O @ np.diag(M).T
    D = np.sqrt(N + N.T - 2 * M)
    return D
Exemple #6
0
def _test_cg_gpr(config: ConfigDense,
                 model: GPR,
                 Xnew: tf.Tensor) -> tf.Tensor:
  """
  Sample generation subroutine common to each unit test
  """
  # Prepare preconditioner for CG
  X, y = model.data
  Kff = model.kernel(X, full_cov=True)
  max_rank = config.num_cond//(2 if config.num_cond > 1 else 1)
  preconditioner = get_default_preconditioner(Kff,
                                              diag=model.likelihood.variance,
                                              max_rank=max_rank)

  count = 0
  L_joint = None
  samples = []
  while count < config.num_samples:
    # Sample $u ~ N(q_mu, q_sqrt q_sqrt^{T})$
    size = min(config.shard_size, config.num_samples - count)

    # Generate draws from the joint distribution $p(f(X), f(Xnew))$
    (f, fnew), L_joint = common.sample_joint(model.kernel,
                                                X,
                                                Xnew,
                                                num_samples=size,
                                                L=L_joint)

    # Solve for update functions
    update_fns = cg_update(model.kernel,
                           X,
                           y,
                           f + model.mean_function(X),
                           tol=1e-6,
                           diag=model.likelihood.variance,
                           max_iter=config.num_cond,
                           preconditioner=preconditioner)

    samples.append(fnew + update_fns(Xnew))
    count += size

  samples = tf.concat(samples, axis=0)
  if model.mean_function is not None:
    samples += model.mean_function(Xnew)
  return samples
Exemple #7
0
def _test_exact_gpr(config: ConfigDense, model: GPR,
                    Xnew: tf.Tensor) -> tf.Tensor:
    """
  Sample generation subroutine common to each unit test
  """
    # Precompute Cholesky factor (optional)
    X, y = model.data
    Kyy = model.kernel(X, full_cov=True)
    Kyy = tf.linalg.set_diag(
        Kyy,
        tf.linalg.diag_part(Kyy) + model.likelihood.variance)
    Lyy = tf.linalg.cholesky(Kyy)

    count = 0
    L_joint = None
    samples = []
    while count < config.num_samples:
        # Sample $u ~ N(q_mu, q_sqrt q_sqrt^{T})$
        size = min(config.shard_size, config.num_samples - count)

        # Generate draws from the joint distribution $p(f(X), f(Xnew))$
        (f, fnew), L_joint = common.sample_joint(model.kernel,
                                                 X,
                                                 Xnew,
                                                 num_samples=size,
                                                 L=L_joint)

        # Solve for update functions
        update_fns = exact_update(model.kernel,
                                  X,
                                  y,
                                  f + model.mean_function(X),
                                  L=Lyy,
                                  diag=model.likelihood.variance)

        samples.append(fnew + update_fns(Xnew))
        count += size

    samples = tf.concat(samples, axis=0)
    if model.mean_function is not None:
        samples += model.mean_function(Xnew)
    return samples
Exemple #8
0
        def main(config):
            assert config is not None, ValueError
            tf.random.set_seed(config.seed)
            gpflow_config.set_default_float(config.floatx)
            gpflow_config.set_default_jitter(config.jitter)

            X = tf.random.uniform([config.num_cond, config.input_dims],
                                  dtype=floatx())
            Xnew = tf.random.uniform([config.num_test, config.input_dims],
                                     dtype=floatx())
            for cls in SupportedBaseKernels:
                minval = config.rel_lengthscales_min * (config.input_dims**0.5)
                maxval = config.rel_lengthscales_max * (config.input_dims**0.5)
                lenscales = tf.random.uniform(shape=[config.input_dims],
                                              minval=minval,
                                              maxval=maxval,
                                              dtype=floatx())

                kern = cls(lengthscales=lenscales,
                           variance=config.kernel_variance)
                const = tf.random.normal([1], dtype=floatx())

                K = kern(X, full_cov=True)
                K = tf.linalg.set_diag(
                    K,
                    tf.linalg.diag_part(K) + config.noise_variance)
                L = tf.linalg.cholesky(K)
                y = L @ tf.random.normal([L.shape[-1], 1],
                                         dtype=floatx()) + const

                model = GPR(kernel=kern,
                            noise_variance=config.noise_variance,
                            data=(X, y),
                            mean_function=mean_functions.Constant(c=const))

                mf, Sff = subroutine(config, model, Xnew)
                mg, Sgg = model.predict_f(Xnew, full_cov=True)

                tol = config.error_tol
                assert allclose(mf, mg, tol, tol)
                assert allclose(Sff, Sgg, tol, tol)
def compute_analytic_GP_predictions(X, y, kernel, noise_variance, X_star):
    """
    Identify the mean and covariance of an analytic GPR posterior for test point locations.

    :param X: The train point locations, with a shape of [N x D].
    :param y: The train targets, with a shape of [N x 1].
    :param kernel: The kernel object.
    :param noise_variance: The variance of the observation model.
    :param X_star: The test point locations, with a shape of [N* x D].

    :return: The mean and covariance of the noise-free predictions,
        with a shape of [N*] and [N* x N*] respectively.
    """
    gpr_model = GPR(data=(X, y), kernel=kernel, noise_variance=noise_variance)

    f_mean, f_var = gpr_model.predict_f(X_star, full_cov=True)
    f_mean, f_var = f_mean[..., 0], f_var[0]
    assert f_mean.shape == (X_star.shape[0], )
    assert f_var.shape == (X_star.shape[0], X_star.shape[0])

    return f_mean, f_var
    def test_single_layer(self):
        kern = RBF(1, lengthscales=0.1)
        layers = init_layers_linear(self.X, self.Y, self.X, [kern])

        lik = Gaussian()
        lik.variance = self.lik_var

        last_layer = SGPR_Layer(layers[-1].kern,
                                layers[-1].feature.Z.read_value(), self.D_Y,
                                layers[-1].mean_function)
        layers = layers[:-1] + [last_layer]

        m_dgp = DGP_Collapsed(self.X, self.Y, lik, layers)
        L_dgp = m_dgp.compute_log_likelihood()
        mean_dgp, var_dgp = m_dgp.predict_f_full_cov(self.Xs, 1)

        m_exact = GPR(self.X, self.Y, kern)
        m_exact.likelihood.variance = self.lik_var
        L_exact = m_exact.compute_log_likelihood()
        mean_exact, var_exact = m_exact.predict_f_full_cov(self.Xs)

        assert_allclose(L_dgp, L_exact, atol=1e-5, rtol=1e-5)
        assert_allclose(mean_dgp[0], mean_exact, atol=1e-5, rtol=1e-5)
        assert_allclose(var_dgp[0], var_exact, atol=1e-5, rtol=1e-5)
Exemple #11
0
def residual_kernel_matrix_kernel_real(Kx, Z, num_eig, ARD=True):
    """K_X|Z"""
    assert len(Kx) == len(Z)
    assert num_eig <= len(Kx)

    T = len(Kx)
    D = Z.shape[1]
    I = eye(T)
    eig_Kx, eix = truncated_eigen(*eigdec(Kx, num_eig))

    rbf = RBF(D, ARD=ARD)
    white = White(D)
    gp_model = GPR(Z, 2 * sqrt(T) * eix @ diag(sqrt(eig_Kx)) / sqrt(eig_Kx[0]),
                   rbf + white)
    gpflow.train.ScipyOptimizer().minimize(gp_model)

    sigma_squared = white.variance.value
    Kz_x = rbf.compute_K_symm(Z)

    P = I - Kz_x @ pdinv(Kz_x + sigma_squared * I)
    return P @ Kx @ P.T
        def test_vs_single_layer(self):
            lik = Gaussian()
            lik_var = 0.01
            lik.variance = lik_var
            N, Ns, D_Y, D_X = self.X.shape[0], self.Xs.shape[
                0], self.D_Y, self.X.shape[1]
            Y = np.random.randn(N, D_Y)
            Ys = np.random.randn(Ns, D_Y)

            kern = Matern52(self.X.shape[1], lengthscales=0.5)
            # mf = Linear(A=np.random.randn(D_X, D_Y), b=np.random.randn(D_Y))
            mf = Zero()
            m_gpr = GPR(self.X, Y, kern, mean_function=mf)
            m_gpr.likelihood.variance = lik_var
            mean_gpr, var_gpr = m_gpr.predict_y(self.Xs)
            test_lik_gpr = m_gpr.predict_density(self.Xs, Ys)
            pred_m_gpr, pred_v_gpr = m_gpr.predict_f(self.Xs)
            pred_mfull_gpr, pred_vfull_gpr = m_gpr.predict_f_full_cov(self.Xs)

            kerns = []
            kerns.append(
                Matern52(self.X.shape[1], lengthscales=0.5, variance=1e-1))
            kerns.append(kern)

            layer0 = GPMC_Layer(kerns[0], self.X.copy(), D_X, Identity())
            layer1 = GPR_Layer(kerns[1], mf, D_Y)
            m_dgp = DGP_Heinonen(self.X, Y, lik, [layer0, layer1])

            mean_dgp, var_dgp = m_dgp.predict_y(self.Xs, 1)
            test_lik_dgp = m_dgp.predict_density(self.Xs, Ys, 1)
            pred_m_dgp, pred_v_dgp = m_dgp.predict_f(self.Xs, 1)
            pred_mfull_dgp, pred_vfull_dgp = m_dgp.predict_f_full_cov(
                self.Xs, 1)

            tol = 1e-4
            assert_allclose(mean_dgp[0], mean_gpr, atol=tol, rtol=tol)
            assert_allclose(test_lik_dgp, test_lik_gpr, atol=tol, rtol=tol)
            assert_allclose(pred_m_dgp[0], pred_m_gpr, atol=tol, rtol=tol)
            assert_allclose(pred_mfull_dgp[0],
                            pred_mfull_gpr,
                            atol=tol,
                            rtol=tol)
            assert_allclose(pred_vfull_dgp[0],
                            pred_vfull_gpr,
                            atol=tol,
                            rtol=tol)
Exemple #13
0
def get_model(model_enum,
              data,
              noise_variance,
              covariance_function,
              max_parallel=10000):
    if not isinstance(model_enum, ModelEnum):
        model_enum = ModelEnum(model_enum)
    if model_enum == ModelEnum.GP:
        gp_model = GPR(data, covariance_function, None, noise_variance)
    elif model_enum == ModelEnum.SSGP:
        gp_model = StateSpaceGP(data,
                                covariance_function,
                                noise_variance,
                                parallel=False)
    elif model_enum == ModelEnum.PSSGP:
        gp_model = StateSpaceGP(data,
                                covariance_function,
                                noise_variance,
                                parallel=True,
                                max_parallel=max_parallel)
    else:
        raise ValueError("model not supported")
    return gp_model
Exemple #14
0
    def generate_gp_models(
            model_or_kernel: Union[GPModel, Kernel],
            data_list: List[RegressionData]
    ):
        """
        Generates a list of GPModel objects with the same length as data_list.

        If a GPModel object was passed, the list will consist of deep copies of the GPModel, with the data reassigned.
        If a Kernel was passed, the list will consist of GPR (all containing the Kernel) instead.

        :param model_or_kernel: GPModel or Kernel object used to generate the list of models
        :param data_list: List of RegressionData. Each model will get one element.
        :return:
        """
        assert isinstance(model_or_kernel, (Kernel, GPModel)), \
            "The regression_source object needs to be an instance of either a Kernel or a GPModel, "
        assert all(map(lambda data: type(data) is tuple and len(data) is 2, data_list)), \
            "data_list should be a list of tuples of length 2 (i.e. a list of RegressionData)"

        is_kernel = isinstance(model_or_kernel, Kernel)

        models = list()
        for data in data_list:
            # Ensures both the InputData and OutputData are in a format usable by tensorflow
            data = tuple(map(util.ensure_tf_matrix, data))

            if is_kernel:
                # Appends a GPR object to the list of models if a Kernel was passed instead of a GPModel
                models.append(GPR(data, model_or_kernel))

            else:
                # Appends a deepcopy of the passed GPModel to the list of models
                model = gf.utilities.deepcopy(model_or_kernel)
                model.data = data
                models.append(model)

        return models
Exemple #15
0
def regression_distance(Y: np.ndarray, Z: np.ndarray, ard=True):
    """d(z,z') = |f(z)-f(z')| where Y=f(Z) + noise and f ~ GP"""
    import gpflow
    from gpflow.kernels import White, RBF
    from gpflow.models import GPR

    n, dims = Z.shape

    rbf = RBF(dims, ARD=ard)
    rbf_white = rbf + White(dims)

    gp_model = GPR(Z, Y, rbf_white)
    gpflow.train.ScipyOptimizer().minimize(gp_model)

    Kz_y = rbf.compute_K_symm(Z)
    Ry = pdinv(rbf_white.compute_K_symm(Z))
    Fy = Y.T @ Ry @ Kz_y  # F(z)

    M = Fy.T @ Fy
    O = np.ones((n, 1))
    N = O @ (np.diag(M)[:, None]).T
    D = np.sqrt(N + N.T - 2 * M)

    return D, Kz_y
Exemple #16
0
data = (x, y)
inducing_variable = tf.random.uniform((M, D))
adam_learning_rate = 0.01
iterations = ci_niter(5)

# %% [markdown]
# ### VGP is a GPR

# %% [markdown]
# The following section demonstrates how natural gradients can turn VGP into GPR *in a single step, if the likelihood is Gaussian*.

# %% [markdown]
# Let's start by first creating a standard GPR model with Gaussian likelihood:

# %%
gpr = GPR(data, kernel=gpflow.kernels.Matern52())

# %% [markdown]
# The log marginal likelihood of the exact GP model is:

# %%
gpr.log_marginal_likelihood().numpy()

# %% [markdown]
# Now we will create an approximate model which approximates the true posterior via a variational Gaussian distribution.<br>We initialize the distribution to be zero mean and unit variance.

# %%
vgp = VGP(data,
          kernel=gpflow.kernels.Matern52(),
          likelihood=gpflow.likelihoods.Gaussian())
Exemple #17
0
def _gpr(x: tf.Tensor, y: tf.Tensor) -> GPR:
    return GPR((x, y), gpflow.kernels.Linear())
Exemple #18
0
# %% [markdown]
# The CGLB model introduces less bias in comparison to SGPR model.
# We can show empirically that CGLB has a lower bias by plotting the objective landscape with respect to different values of the lengthscale hyperparameters.

# %%

x, y = data
n = x.shape[0]
m = 10

iv_indices = np.random.choice(range(n), size=m, replace=False)
iv = x[iv_indices, :]

noise = 0.1
gpr = GPR(data, kernel=SquaredExponential(), noise_variance=noise)
cglb = CGLB(data,
            kernel=SquaredExponential(),
            noise_variance=noise,
            inducing_variable=iv)
sgpr = SGPR(data,
            kernel=SquaredExponential(),
            noise_variance=noise,
            inducing_variable=iv)


def loss_with_changed_parameter(model, parameter, value: float):
    original = parameter.numpy()
    parameter.assign(value)
    loss = model.training_loss()
    parameter.assign(original)
Exemple #19
0
def build_model(data, mean_function):
    model = GPR(data, kernel=RBF(), mean_function=mean_function)
    set_trainable(model.kernel, False)
    model.likelihood.variance.assign(1e-2)
    set_trainable(model.likelihood, False)
    return model
Exemple #20
0
 def __init__(self, X, Y, kern):
     GPR.__init__(self, X, Y, kern)
 def __init__(self, *args, paths: AbstractSampler = None, **kwargs):
   GPR.__init__(self, *args, **kwargs)
   self._paths = paths