Esempio n. 1
0
def test_linear_coregionalization_shi(
    register_posterior_test,
    q_sqrt_factory,
    full_cov,
    full_output_cov,
    whiten,
    num_latent_gps,
    output_dims,
):
    """
    Linear coregionalization with shared independent inducing variables.
    """
    kernel = gpflow.kernels.LinearCoregionalization(
        [gpflow.kernels.SquaredExponential() for _ in range(num_latent_gps)],
        W=tf.random.normal((output_dims, num_latent_gps)),
    )
    inducing_variable = gpflow.inducing_variables.SharedIndependentInducingVariables(
        inducingpoint_wrapper(np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))
    )

    q_mu = np.random.randn(NUM_INDUCING_POINTS, num_latent_gps)
    q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, num_latent_gps)

    conditional = create_conditional(
        kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten,
    )
    posterior = create_posterior(
        kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten,
    )
    register_posterior_test(posterior, LinearCoregionalizationPosterior)

    _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional(
        posterior, conditional, full_cov, full_output_cov
    )
Esempio n. 2
0
    def _init_layers(self,
                     X,
                     Y,
                     Z,
                     q_sqrt_initial,
                     kernels,
                     mean_function=Zero(),
                     Layer=SVGPLayer,
                     white=False):
        """
        The first layer only models between input and output_1,
        The second layer models between input and output_2, output_1 and output_2,
        The inducing point for each layer for input dimension should be shared?
        The induing point for output dimension should be calculated instead of changing?"""

        layers = []
        self.inducing_inputs = inducingpoint_wrapper(Z[:, :self.m])
        inducing_inputs = self.inducing_inputs.Z

        for i in range(self.num_outputs):
            layer = Layer(kernels[i],
                          inducing_inputs,
                          Z[:, self.m + i],
                          q_sqrt_initial[:, i],
                          mean_function,
                          white=white)
            layers.append(layer)
            inducing_inputs = tf.concat([inducing_inputs, layer.q_mu], axis=1)

        return layers
Esempio n. 3
0
def test_posterior_create_with_variables_update_cache_works(
        q_sqrt_factory, whiten):
    # setup posterior
    kernel = gpflow.kernels.SquaredExponential()
    inducing_variable = inducingpoint_wrapper(
        np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))

    q_mu = tf.Variable(np.random.randn(NUM_INDUCING_POINTS, 1))

    initial_q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1)
    if initial_q_sqrt is not None:
        q_sqrt = tf.Variable(initial_q_sqrt)
    else:
        q_sqrt = initial_q_sqrt

    posterior = IndependentPosteriorSingleOutput(
        kernel=kernel,
        inducing_variable=inducing_variable,
        q_mu=q_mu,
        q_sqrt=q_sqrt,
        whiten=whiten,
        precompute_cache=PrecomputeCacheType.VARIABLE,
    )
    assert isinstance(posterior.alpha, tf.Variable)
    assert isinstance(posterior.Qinv, tf.Variable)

    alpha = posterior.alpha
    Qinv = posterior.Qinv

    posterior.update_cache()

    assert posterior.alpha is alpha
    assert posterior.Qinv is Qinv
Esempio n. 4
0
def test_independent_single_output(register_posterior_test, q_sqrt_factory,
                                   whiten, full_cov, full_output_cov):
    kernel = gpflow.kernels.SquaredExponential()
    inducing_variable = inducingpoint_wrapper(
        np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))

    q_mu = np.random.randn(NUM_INDUCING_POINTS, 1)
    q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1)

    conditional = create_conditional(
        kernel=kernel,
        inducing_variable=inducing_variable,
        q_mu=q_mu,
        q_sqrt=q_sqrt,
        whiten=whiten,
    )
    posterior = create_posterior(
        kernel=kernel,
        inducing_variable=inducing_variable,
        q_mu=q_mu,
        q_sqrt=q_sqrt,
        whiten=whiten,
    )
    register_posterior_test(posterior, IndependentPosteriorSingleOutput)

    _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional(
        posterior, conditional, full_cov, full_output_cov)
Esempio n. 5
0
def test_independent_multi_output_sek_shi(
    register_posterior_test,
    q_sqrt_factory,
    full_cov,
    full_output_cov,
    whiten,
    num_latent_gps,
    output_dims,
):
    """
    Independent multi-output posterior with separate independent kernels and shared inducing points.
    """
    kernel = gpflow.kernels.SeparateIndependent(
        [gpflow.kernels.SquaredExponential() for _ in range(num_latent_gps)]
    )
    inducing_variable = gpflow.inducing_variables.SharedIndependentInducingVariables(
        inducingpoint_wrapper(np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))
    )

    q_mu = np.random.randn(NUM_INDUCING_POINTS, num_latent_gps)
    q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, num_latent_gps)

    conditional = create_conditional(
        kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten,
    )
    posterior = create_posterior(
        kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten,
    )
    register_posterior_test(posterior, IndependentPosteriorMultiOutput)

    _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional(
        posterior, conditional, full_cov, full_output_cov
    )
Esempio n. 6
0
def test_fallback_independent_multi_output_shi(
    register_posterior_test, q_sqrt_factory, full_cov, full_output_cov, whiten, output_dims,
):
    """
    Fallback posterior with shared independent inducing variables.

    The FallbackIndependentLatentPosterior is a subclass of the FullyCorrelatedPosterior which
    requires a single latent GP function.
    """
    kernel = gpflow.kernels.LinearCoregionalization(
        [gpflow.kernels.SquaredExponential()], W=tf.random.normal((output_dims, 1))
    )
    inducing_variable = gpflow.inducing_variables.FallbackSharedIndependentInducingVariables(
        inducingpoint_wrapper(np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))
    )

    q_mu = np.random.randn(NUM_INDUCING_POINTS, 1)
    q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1)

    conditional = create_conditional(
        kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten,
    )
    posterior = create_posterior(
        kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten,
    )
    register_posterior_test(posterior, FallbackIndependentLatentPosterior)

    _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional(
        posterior, conditional, full_cov, full_output_cov
    )
Esempio n. 7
0
def test_fully_correlated_multi_output(
    register_posterior_test, q_sqrt_factory, full_cov, full_output_cov, whiten, output_dims,
):
    """
    The fully correlated posterior has one latent GP.
    """
    kernel = gpflow.kernels.SharedIndependent(
        gpflow.kernels.SquaredExponential(), output_dim=output_dims
    )
    inducing_variable = inducingpoint_wrapper(np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))

    q_mu = np.random.randn(output_dims * NUM_INDUCING_POINTS, 1)
    q_sqrt = q_sqrt_factory(output_dims * NUM_INDUCING_POINTS, 1)

    conditional = create_conditional(
        kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten,
    )
    posterior = create_posterior(
        kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten,
    )
    register_posterior_test(posterior, FullyCorrelatedPosterior)

    _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional(
        posterior, conditional, full_cov, full_output_cov
    )
Esempio n. 8
0
    def __init__(
        self,
        kernel,
        likelihood,
        inducing_variable,
        *,
        mean_function=None,
        num_latent_gps: int = 1,
        q_diag: bool = False,
        q_mu=None,
        q_sqrt=None,
        whiten: bool = True,
        num_data=None,
    ):
        """
        - kernel, likelihood, inducing_variables, mean_function are appropriate
          GPflow objects
        - num_latent_gps is the number of latent processes to use, defaults to 1
        - q_diag is a boolean. If True, the covariance is approximated by a
          diagonal matrix.
        - whiten is a boolean. If True, we use the whitened representation of
          the inducing points.
        - num_data is the total number of observations, defaults to X.shape[0]
          (relevant when feeding in external minibatches)
        """
        # init the super class, accept args
        super().__init__(kernel, likelihood, mean_function, num_latent_gps)
        self.num_data = num_data
        self.q_diag = q_diag
        self.whiten = whiten
        self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        # init variational parameters
        num_inducing = len(self.inducing_variable)
        self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)
Esempio n. 9
0
    def __init__(self,
                 kernel,
                 inducing_variables,
                 num_outputs,
                 mean_function,
                 input_prop_dim=None,
                 white=False,
                 **kwargs):
        super().__init__(input_prop_dim, **kwargs)

        self.num_inducing = inducing_variables.shape[0]
        self.mean_function = mean_function
        self.num_outputs = num_outputs
        self.white = white

        self.kernels = []
        for i in range(self.num_outputs):
            self.kernels.append(copy.deepcopy(kernel))

        # Initialise q_mu to all zeros
        q_mu = np.zeros((self.num_inducing, num_outputs))
        self.q_mu = Parameter(q_mu, dtype=default_float())

        # Initialise q_sqrt to identity function
        #q_sqrt = tf.tile(tf.expand_dims(tf.eye(self.num_inducing,
        #    dtype=default_float()), 0), (num_outputs, 1, 1))
        q_sqrt = [
            np.eye(self.num_inducing, dtype=default_float())
            for _ in range(num_outputs)
        ]
        q_sqrt = np.array(q_sqrt)
        # Store as lower triangular matrix L.
        self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        # Initialise to prior (Ku) + jitter.
        if not self.white:
            Kus = [
                self.kernels[i].K(inducing_variables)
                for i in range(self.num_outputs)
            ]
            Lus = [
                np.linalg.cholesky(Kus[i] + np.eye(self.num_inducing) *
                                   default_jitter())
                for i in range(self.num_outputs)
            ]
            q_sqrt = Lus
            q_sqrt = np.array(q_sqrt)
            self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        self.inducing_points = []
        for i in range(self.num_outputs):
            self.inducing_points.append(
                inducingpoint_wrapper(inducing_variables))
Esempio n. 10
0
    def __init__(
        self,
        encoder,
        kernel: Optional[Kernel] = None,
        inducing_variable=None,
        *,
        num_latent_gps: int = 1,  #Y.shape[-1]
        data_dim: tuple = None,
        mean_function=None,
        q_diag: bool = False,
        q_mu=None,
        q_sqrt=None,
        whiten: bool = False,
    ):
        """
        - kernel, likelihood, inducing_variables, mean_function are appropriate
          GPflow objects
        - num_latent_gps is the number of latent processes to use, defaults to 2, as
          the dimensionality reduction is at dimensions 2
        - q_diag is a boolean. If True, the covariance is approximated by a
          diagonal matrix.
        - whiten is a boolean. If True, we use the whitened representation of
          the inducing points.
        - num_data is the total number of observations, defaults to X.shape[0]
          (relevant when feeding in external minibatches)
        
        The prior is by default a Normal Gaussian

        """

        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        self.num_data = data_dim[0]
        self.num_latent_gps = data_dim[1]
        self.q_diag = q_diag
        self.whiten = whiten
        self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        # init variational parameters
        num_inducing = self.inducing_variable.num_inducing
        self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)
        self.loss_placeholder = defaultdict(
            list, {k: []
                   for k in ("KL_x", "ELBO", "KL_u")})

        self.encoder = encoder

        # init the super class, accept args
        super().__init__(kernel, likelihoods.Gaussian(variance=0.1),
                         mean_function, num_latent_gps)
Esempio n. 11
0
def test_posterior_update_cache_fails_without_argument(q_sqrt_factory, whiten):
    # setup posterior
    kernel = gpflow.kernels.SquaredExponential()
    inducing_variable = inducingpoint_wrapper(
        np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))

    q_mu = tf.Variable(np.random.randn(NUM_INDUCING_POINTS, 1))

    initial_q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1)
    if initial_q_sqrt is not None:
        q_sqrt = tf.Variable(initial_q_sqrt)
    else:
        q_sqrt = initial_q_sqrt

    posterior = IndependentPosteriorSingleOutput(
        kernel=kernel,
        inducing_variable=inducing_variable,
        q_mu=q_mu,
        q_sqrt=q_sqrt,
        whiten=whiten,
        precompute_cache=None,
    )
    assert posterior.alpha is None
    assert posterior.Qinv is None

    with pytest.raises(ValueError):
        posterior.update_cache()

    posterior.update_cache(PrecomputeCacheType.TENSOR)
    assert isinstance(posterior.alpha, tf.Tensor)
    assert isinstance(posterior.Qinv, tf.Tensor)

    posterior.update_cache(PrecomputeCacheType.NOCACHE)
    assert posterior._precompute_cache == PrecomputeCacheType.NOCACHE
    assert posterior.alpha is None
    assert posterior.Qinv is None

    posterior.update_cache(
        PrecomputeCacheType.TENSOR)  # set posterior._precompute_cache
    assert posterior._precompute_cache == PrecomputeCacheType.TENSOR
    posterior.alpha = posterior.Qinv = None  # clear again

    posterior.update_cache()  # does not raise an exception
    assert isinstance(posterior.alpha, tf.Tensor)
    assert isinstance(posterior.Qinv, tf.Tensor)
Esempio n. 12
0
def test_posterior_update_cache_with_variables_no_precompute(
        q_sqrt_factory, whiten, precompute_cache_type):
    kernel = gpflow.kernels.SquaredExponential()
    inducing_variable = inducingpoint_wrapper(
        np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))

    q_mu = np.random.randn(NUM_INDUCING_POINTS, 1)
    q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1)

    posterior = IndependentPosteriorSingleOutput(
        kernel=kernel,
        inducing_variable=inducing_variable,
        q_mu=q_mu,
        q_sqrt=q_sqrt,
        whiten=whiten,
        precompute_cache=precompute_cache_type,
    )
    posterior.update_cache(PrecomputeCacheType.VARIABLE)

    assert isinstance(posterior.alpha, tf.Variable)
    assert isinstance(posterior.Qinv, tf.Variable)
Esempio n. 13
0
def test_posterior_update_cache_with_variables_update_value(
        q_sqrt_factory, whiten):
    # setup posterior
    kernel = gpflow.kernels.SquaredExponential()
    inducing_variable = inducingpoint_wrapper(
        np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS))

    q_mu = tf.Variable(np.random.randn(NUM_INDUCING_POINTS, 1))

    initial_q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1)
    if initial_q_sqrt is not None:
        q_sqrt = tf.Variable(initial_q_sqrt)
    else:
        q_sqrt = initial_q_sqrt

    posterior = IndependentPosteriorSingleOutput(
        kernel=kernel,
        inducing_variable=inducing_variable,
        q_mu=q_mu,
        q_sqrt=q_sqrt,
        whiten=whiten,
        precompute_cache=PrecomputeCacheType.TENSOR,
    )
    initial_alpha = posterior.alpha
    initial_Qinv = posterior.Qinv

    posterior.update_cache(PrecomputeCacheType.VARIABLE)

    # ensure the values of alpha and Qinv will change
    q_mu.assign_add(tf.ones_like(q_mu))
    if initial_q_sqrt is not None:
        q_sqrt.assign_add(tf.ones_like(q_sqrt))
    posterior.update_cache(PrecomputeCacheType.VARIABLE)

    # assert that the values have changed
    assert not np.allclose(initial_alpha, tf.convert_to_tensor(
        posterior.alpha))
    if initial_q_sqrt is not None:
        assert not np.allclose(initial_Qinv,
                               tf.convert_to_tensor(posterior.Qinv))
Esempio n. 14
0
    def __init__(self, kernel, inducing_variables, q_mu_initial, q_sqrt_initial,
                 mean_function,optimize_inducing_location=False, white=False, **kwargs):
        super().__init__(**kwargs)

        self.inducing_points = inducingpoint_wrapper(inducing_variables)
        gpflow.set_trainable(self.inducing_points, optimize_inducing_location)
        
        self.num_inducing = inducing_variables.shape[0]
        
         # Initialise q_mu to y^2_pi(i)
        q_mu = q_mu_initial[:,None]
        if optimize_inducing_location: q_mu = np.zeros((self.num_inducing, 1))
        self.q_mu = Parameter(q_mu, dtype=default_float())

        # Initialise q_sqrt to near deterministic. Store as lower triangular matrix L.
        q_sqrt = 1e-4*np.eye(self.num_inducing, dtype=default_float())
        #q_sqrt = np.diag(q_sqrt_initial)
        self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        self.kernel = kernel
        self.mean_function = mean_function
        self.white = white
Esempio n. 15
0
    def __init__(self,
                 X,
                 Y,
                 M,
                 mean_function=Zero(),
                 white=False,
                 Layer=SVGPLayer,
                 **kwargs):

        self.temporal_layers = []

        for i in range(self.num_outputs):
            kerneli = self.temporal_kernel()
            inducing_inputs = inducingpoint_wrapper(
                kmeans2(X, M, minit='points')[0])
            layer = Layer(kerneli,
                          inducing_inputs.Z,
                          mean_function,
                          white=white)
            self.temporal_layers.append(layer)

        super().__init__(**kwargs)
Esempio n. 16
0
    def __init__(
        self,
        data: OutputData,
        encoder,
        kernel: Optional[Kernel] = None,
        inducing_variable=None,
        X_prior_mean=None,
        X_prior_var=None,
    ):
        """
        Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood.

        :param data: data matrix, size N (number of points) x D (dimensions)
        :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions).
        :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space.
        :param kernel: kernel specification, by default Squared Exponential
        :param num_inducing_variables: number of inducing points, M
        :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default
            random permutation of X_data_mean.
        :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean.
        :param X_prior_var: prior variance used in KL term of bound. By default 1.
        """

        self.latent_dimensions = 2
        #grab data
        self.data = data_input_to_tensor(data)

        num_data, num_latent_gps = data.shape

        self.num_data = num_data

        #def kernel
        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        #init GPMODEL
        super().__init__(kernel,
                         likelihoods.Gaussian(variance=0.1),
                         num_latent_gps=num_latent_gps)

        #init parameter inducing point
        if (inducing_variable is None):
            raise ValueError(
                "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`"
            )
        else:
            self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        #loss placeholder for analysis purpuse
        self.loss_placeholder = defaultdict(list,
                                            {k: []
                                             for k in ("KL_x", "ELBO")})

        # deal with parameters for the prior mean variance of X
        if X_prior_mean is None:
            X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions),
                                    dtype=default_float())
        if X_prior_var is None:
            X_prior_var = tf.ones((self.num_data, self.latent_dimensions),
                                  dtype=default_float())

        self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean),
                                                 dtype=default_float())
        self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var),
                                                dtype=default_float())

        # Encoder
        self.encoder = encoder

        #sanity check
        # assert np.all(X_data_mean.shape == X_data_var.shape)
        # assert X_data_mean.shape[0] == self.data.shape[0], "X mean and Y must be same size."
        # assert X_data_var.shape[0] == self.data.shape[0], "X var and Y must be same size."
        # assert X_data_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_mean.shape[0] == self.num_data
        assert self.X_prior_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_var.shape[0] == self.num_data
        assert self.X_prior_var.shape[1] == self.latent_dimensions
Esempio n. 17
0
    def __init__(
        self,
        data: OutputData,
        split_space: bool, 
        Xp_mean: tf.Tensor,
        Xp_var: tf.Tensor,
        pi: tf.Tensor,
        kernel_K: List[Kernel],
        Zp: tf.Tensor,
        Xs_mean=None,
        Xs_var=None,
        kernel_s=None,
        Zs=None,
        Xs_prior_mean=None,
        Xs_prior_var=None,
        Xp_prior_mean=None,
        Xp_prior_var=None,
        pi_prior=None
    ):
        """
        Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood.

        :param data: data matrix, size N (number of points) x D (dimensions)
        :param: split_space, if true, have both shared and private space; 
            if false, only have private spaces (note: to recover GPLVM, set split_space=False and let K=1)
        :param Xp_mean: mean latent positions in the private space [N, Qp] (Qp is the dimension of the private space)
        :param Xp_var: variance of the latent positions in the private space [N, Qp]
        :param pi: mixture responsibility of each category to each point [N, K] (K is the number of categories), i.e. q(c)
        :param kernel_K: private space kernel, one for each category
        :param Zp: inducing inputs of the private space [M, Qp]
        :param num_inducing_variables: number of inducing points, M
        :param Xs_mean: mean latent positions in the shared space [N, Qs] (Qs is the dimension of the shared space). i.e. mus in q(Xs) ~ N(Xs | mus, Ss)
        :param Xs_var: variance of latent positions in shared space [N, Qs], i.e. Ss, assumed diagonal
        :param kernel_s: shared space kernel 
        :param Zs: inducing inputs of the shared space [M, Qs] (M is the number of inducing points)
        :param Xs_prior_mean: prior mean used in KL term of bound, [N, Qs]. By default 0. mean in p(Xs)
        :param Xs_prior_var: prior variance used in KL term of bound, [N, Qs]. By default 1. variance in p(Xs)
        :param Xp_prior_mean: prior mean used in KL term of bound, [N, Qp]. By default 0. mean in p(Xp)
        :param Xp_prior_var: prior variance used in KL term of bound, [N, Qp]. By default 1. variance in p(Xp)
        :param pi_prior: prior mixture weights used in KL term of the bound, [N, K]. By default uniform. p(c)        
        """

        # if don't want shared space, set shared space to none --> get a mixture of GPLVM
        # if don't want private space, set shared space to none, set K = 1 and only include 1 kernel in `kernel_K` --> recover the original GPLVM 

        # TODO: think about how to do this with minibatch
        # it's awkward since w/ minibatch the model usually doesn't store the data internally
        # but for gplvm, you need to keep the q(xn) for all the n's
        # so you need to know which ones to update for each minibatch, probably can be solved but not pretty
        # using inference network / back constraints will solve this, since we will be keeping a global set of parameters
        # rather than a set for each q(xn)
        self.N, self.D = data.shape
        self.Qp = Xp_mean.shape[1]
        self.K = pi.shape[1]
        self.split_space = split_space

        assert Xp_var.ndim == 2
        assert len(kernel_K) == self.K
        assert np.all(Xp_mean.shape == Xp_var.shape)
        assert Xp_mean.shape[0] == self.N, "Xp_mean and Y must be of same size"
        assert pi.shape[0] == self.N, "pi and Y must be of the same size"

        super().__init__()
        self.likelihood = likelihoods.Gaussian()
        self.kernel_K = kernel_K
        self.data = data_input_to_tensor(data)
        # the covariance of q(X) as a [N, Q] matrix, the assumption is that Sn's are diagonal
        # i.e. the latent dimensions are uncorrelated
        # otherwise would require a [N, Q, Q] matrix
        self.Xp_mean = Parameter(Xp_mean)
        self.Xp_var = Parameter(Xp_var, transform=positive())
        self.pi = Parameter(pi, transform=tfp.bijectors.SoftmaxCentered())
        self.Zp = inducingpoint_wrapper(Zp)
        self.M = len(self.Zp)

        # initialize the variational parameters for q(U), same way as in SVGP
        # q_mu: List[K], mean of the inducing variables U [M, D], i.e m in q(U) ~ N(U | m, S), 
        #   initialized as zeros
        # q_sqrt: List[K], cholesky of the covariance matrix of the inducing variables [D, M, M]
        #   q_diag is false because natural gradient only works for full covariance
        #   initialized as all identities
        # we need K sets of q(Uk), each approximating fs+fk
        self.q_mu = []
        self.q_sqrt = []
        for k in range(self.K):
            q_mu = np.zeros((self.M, self.D))
            q_mu = Parameter(q_mu, dtype=default_float())  # [M, D]
            self.q_mu.append(q_mu)

            q_sqrt = [
                np.eye(self.M, dtype=default_float()) for _ in range(self.D)
            ]
            q_sqrt = np.array(q_sqrt)
            q_sqrt = Parameter(q_sqrt, transform=triangular())  # [D, M, M]
            self.q_sqrt.append(q_sqrt)

        # deal with parameters for the prior 
        if Xp_prior_mean is None:
            Xp_prior_mean = tf.zeros((self.N, self.Qp), dtype=default_float())
        if Xp_prior_var is None:
            Xp_prior_var = tf.ones((self.N, self.Qp))
        if pi_prior is None:
            pi_prior = tf.ones((self.N, self.K), dtype=default_float()) * 1/self.K

        self.Xp_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xp_prior_mean), dtype=default_float())
        self.Xp_prior_var = tf.convert_to_tensor(np.atleast_1d(Xp_prior_var), dtype=default_float()) 
        self.pi_prior = tf.convert_to_tensor(np.atleast_1d(pi_prior), dtype=default_float()) 


        # if we have both shared space and private space, need to initialize the parameters for the shared space
        if split_space:
            assert Xs_mean is not None and Xs_var is not None and kernel_s is not None and Zs is not None, 'Xs_mean, Xs_var, kernel_s, Zs need to be initialize if `split_space=True`'
            assert Xs_var.ndim == 2 
            assert np.all(Xs_mean.shape == Xs_var.shape)
            assert Xs_mean.shape[0] == self.N, "Xs_mean and Y must be of same size"
            self.Qs = Xs_mean.shape[1]
            self.kernel_s = kernel_s
            self.Xs_mean = Parameter(Xs_mean)
            self.Xs_var = Parameter(Xs_var, transform=positive())
            self.Zs = inducingpoint_wrapper(Zs)

            if len(Zs) != len(Zp):
                raise ValueError(
                    '`Zs` and `Zp` should have the same length'
                )

            if Xs_prior_mean is None:
                Xs_prior_mean = tf.zeros((self.N, self.Qs), dtype=default_float())
            if Xs_prior_var is None:
                Xs_prior_var = tf.ones((self.N, self.Qs))
            self.Xs_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xs_prior_mean), dtype=default_float())
            self.Xs_prior_var = tf.convert_to_tensor(np.atleast_1d(Xs_prior_var), dtype=default_float())

        self.Fq = tf.zeros((self.N, self.K), dtype=default_float())
Esempio n. 18
0
    def __init__(
        self,
        data: OutputData,
        X_data_mean: Optional[tf.Tensor] = None,
        X_data_var: Optional[tf.Tensor] = None,
        kernel: Optional[Kernel] = None,
        num_inducing_variables: Optional[int] = None,
        inducing_variable=None,
        X_prior_mean=None,
        X_prior_var=None,
    ):
        """
        Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood.

        :param data: data matrix, size N (number of points) x D (dimensions)
        :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions).
        :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space.
        :param kernel: kernel specification, by default Squared Exponential
        :param num_inducing_variables: number of inducing points, M
        :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default
            random permutation of X_data_mean.
        :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean.
        :param X_prior_var: prior variance used in KL term of bound. By default 1.
        """

        self.latent_dimensions = 2
        #grab data
        self.data = data_input_to_tensor(data)

        #define lat-space initialization
        if X_data_mean is None:
            X_data_mean = pca_reduce(data, self.latent_dimensions)

        num_data, num_latent_gps = X_data_mean.shape

        self.num_data = num_data

        if X_data_var is None:
            X_data_var = tf.ones((self.num_data, self.latent_dimensions),
                                 dtype=default_float())

        assert X_data_var.ndim == 2

        self.output_dim = self.data.shape[-1]  #num_latent maybe

        #def kernel
        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        #init GPMODEL
        super().__init__(kernel,
                         likelihoods.Gaussian(variance=0.1),
                         num_latent_gps=num_latent_gps)

        #init Parameters latent
        self.X_data_mean = Parameter(X_data_mean)
        self.X_data_var = Parameter(X_data_var, transform=positive())

        #init parameter inducing point
        if (inducing_variable is None) == (num_inducing_variables is None):
            raise ValueError(
                "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`"
            )

        if inducing_variable is None:
            # By default we initialize by subset of initial latent points
            # Note that tf.random.shuffle returns a copy, it does not shuffle in-place
            #maybe use k-means clustering
            Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables]
            inducing_variable = InducingPoints(Z)

        self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        #loss placeholder for analysis purpuse
        self.loss_placeholder = defaultdict(list,
                                            {k: []
                                             for k in ("KL_x", "ELBO")})

        # deal with parameters for the prior mean variance of X
        if X_prior_mean is None:
            X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions),
                                    dtype=default_float())
        if X_prior_var is None:
            X_prior_var = tf.ones((self.num_data, self.latent_dimensions),
                                  dtype=default_float())

        self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean),
                                                 dtype=default_float())
        self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var),
                                                dtype=default_float())

        #sanity check

        assert np.all(X_data_mean.shape == X_data_var.shape)
        assert X_data_mean.shape[0] == self.data.shape[
            0], "X mean and Y must be same size."
        assert X_data_var.shape[0] == self.data.shape[
            0], "X var and Y must be same size."
        assert X_data_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_mean.shape[0] == self.num_data
        assert self.X_prior_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_var.shape[0] == self.num_data
        assert self.X_prior_var.shape[1] == self.latent_dimensions
Esempio n. 19
0
    def __init__(
        self,
        data: OutputData,
        kernel: Optional[Kernel] = None,
        latent_dimensions: Optional[int] = 2,
        num_inducing_variables: Optional[int] = None,
        inducing_variable=None,
        *,
        mean_function=None,
        q_diag: bool = False,
        q_mu=None,
        q_sqrt=None,
        whiten: bool = False,
    ):
        """
        - kernel, likelihood, inducing_variables, mean_function are appropriate
          GPflow objects
        - num_latent_gps is the number of latent processes to use, defaults to 2, as
          the dimensionality reduction is at dimensions 2
        - q_diag is a boolean. If True, the covariance is approximated by a
          diagonal matrix.
        - whiten is a boolean. If True, we use the whitened representation of
          the inducing points.
        - num_data is the total number of observations, defaults to X.shape[0]
          (relevant when feeding in external minibatches)
        """

        self.latent_dimensions = latent_dimensions

        #grab data
        self.data = data_input_to_tensor(data)

        #define lat-space initialization
        X_data_mean = pca_reduce(data, self.latent_dimensions)

        num_data, num_latent_gps = data.shape

        self.num_data = num_data

        X_data_var = tf.ones((self.num_data, self.latent_dimensions),
                             dtype=default_float())

        assert X_data_var.ndim == 2

        #def kernel
        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        #init Parameters latent
        self.X_data_mean = Parameter(X_data_mean)
        self.X_data_var = Parameter(X_data_var, transform=positive())

        #init parameter inducing point
        if (inducing_variable is None) == (num_inducing_variables is None):
            raise ValueError(
                "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`"
            )

        if inducing_variable is None:
            # By default we initialize by subset of initial latent points
            # Note that tf.random.shuffle returns a copy, it does not shuffle in-place
            #maybe use k-means clustering
            Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables]
            inducing_variable = InducingPoints(Z)

        self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        #loss placeholder for analysis purpuse
        self.loss_placeholder = defaultdict(
            list, {k: []
                   for k in ("KL_x", "ELBO", "KL_u")})

        # deal with parameters for the prior mean variance of X
        X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions),
                                dtype=default_float())
        X_prior_var = tf.ones((self.num_data, self.latent_dimensions),
                              dtype=default_float())

        self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean),
                                                 dtype=default_float())
        self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var),
                                                dtype=default_float())

        #sanity check

        assert np.all(X_data_mean.shape == X_data_var.shape)
        assert X_data_mean.shape[0] == self.data.shape[
            0], "X mean and Y must be same size."
        assert X_data_var.shape[0] == self.data.shape[
            0], "X var and Y must be same size."
        assert X_data_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_mean.shape[0] == self.num_data
        assert self.X_prior_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_var.shape[0] == self.num_data
        assert self.X_prior_var.shape[1] == self.latent_dimensions

        # init the super class, accept args
        super().__init__(kernel, likelihoods.Gaussian(variance=0.1),
                         mean_function, num_latent_gps)
        self.q_diag = q_diag
        self.whiten = whiten
        #self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        # init variational parameters
        num_inducing = self.inducing_variable.num_inducing
        self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)