Beispiel #1
0
    def __init__(self,
                 data: RegressionData,
                 kernel,
                 noise_variance: float = 1.0,
                 parallel=False,
                 max_parallel=10000):

        self.noise_variance = Parameter(noise_variance, transform=positive())
        ts, ys = data_input_to_tensor(data)
        super().__init__(kernel, None, None, num_latent_gps=ys.shape[-1])
        self.data = ts, ys
        filter_spec = kernel.get_spec(ts.shape[0])
        filter_ys_spec = tf.TensorSpec((ts.shape[0], 1),
                                       config.default_float())
        smoother_spec = kernel.get_spec(None)
        smoother_ys_spec = tf.TensorSpec((None, 1), config.default_float())

        if not parallel:
            self._kf = tf.function(
                partial(kf, return_loglikelihood=True, return_predicted=False),
                input_signature=[filter_spec, filter_ys_spec])
            self._kfs = tf.function(
                kfs, input_signature=[smoother_spec, smoother_ys_spec])
        else:
            self._kf = tf.function(
                partial(pkf,
                        return_loglikelihood=True,
                        max_parallel=ts.shape[0]),
                input_signature=[filter_spec, filter_ys_spec])
            self._kfs = tf.function(
                partial(pkfs, max_parallel=max_parallel),
                input_signature=[smoother_spec, smoother_ys_spec])
Beispiel #2
0
    def predict_f(self,
                  Xnew: InputData,
                  full_cov: bool = False,
                  full_output_cov: bool = False) -> MeanAndVariance:
        r"""
        This method computes predictions at X \in R^{N \x D} input points

        .. math::
            p(F* | Y)

        where F* are points on the GP at new data points, Y are noisy observations at training data points.
        Note that full_cov => full_output_cov (regardless of the ordinate given for full_output_cov), to avoid ambiguity.
        """
        full_output_cov = True if full_cov else full_output_cov
        Xnew = tf.reshape(data_input_to_tensor(Xnew), (-1, self._M))
        n = Xnew.shape[0]
        f_mean, f_var = base_conditional(Kmn=self.kernel(self._X, Xnew),
                                         Kmm=self.likelihood.add_to(self.KXX),
                                         Knn=self.kernel(Xnew, Xnew),
                                         f=self._Y - self._mean,
                                         full_cov=True,
                                         white=False)
        f_mean += tf.reshape(self.mean_function(Xnew), f_mean.shape)
        f_mean_shape = (self._L, n)
        f_mean = tf.reshape(f_mean, f_mean_shape)
        f_var = tf.reshape(f_var, f_mean_shape * 2)
        if full_output_cov:
            einsum = 'LNln -> LlNn'
        else:
            einsum = 'LNLn -> LNn'
        f_var = tf.einsum(einsum, f_var)
        if not full_cov:
            f_var = tf.einsum('...NN->...N', f_var)
        perm = tuple(reversed(range(tf.rank(f_var))))
        return tf.transpose(f_mean), tf.transpose(f_var, perm)
Beispiel #3
0
    def __init__(self,
                 variance,
                 lengthscales,
                 name='Kernel',
                 active_dims=None):
        """ Kernel Constructor.

        Args:
            variance: An (L,L) symmetric, positive definite matrix for the signal variance.
            lengthscales: An (L,M) matrix of positive definite lengthscales.
            is_lengthscales_trainable: Whether the lengthscales of this kernel are trainable.
            name: The name of this kernel.
            active_dims: Which of the input dimensions are used. The default None means all of them.
        """
        super(AnisotropicStationary, self).__init__(
            active_dims=active_dims, name=name
        )  # Do not call gf.kernels.AnisotropicStationary.__init__()!
        self.variance = Variance(value=np.atleast_2d(variance),
                                 name=name + 'Variance')
        self._L = self.variance.shape[0]
        lengthscales = data_input_to_tensor(lengthscales)
        lengthscales_shape = tuple(tf.shape(lengthscales).numpy())
        self._M = 1 if lengthscales_shape in ((), (1, ), (1, 1), (
            self._L, )) else lengthscales_shape[-1]
        lengthscales = tf.reshape(
            tf.broadcast_to(lengthscales, (self._L, self._M)),
            (self._L, 1, self._M))
        self.lengthscales = Parameter(lengthscales,
                                      transform=positive(),
                                      trainable=False,
                                      name=name + 'Lengthscales')
        self._validate_ard_active_dims(self.lengthscales[0, 0])
Beispiel #4
0
def test_data_input_to_tensor():
    input1 = (1.0, (2.0, ))
    output1 = data_input_to_tensor(input1)
    assert output1[0].dtype == tf.float64
    assert output1[1][0].dtype == tf.float64

    input2 = (1.0, [2.0])
    output2 = data_input_to_tensor(input2)
    assert output2[0].dtype == tf.float64
    assert output2[1][0].dtype == tf.float64

    input3 = (1.0, (np.arange(3, dtype=np.float16), ) * 2)
    output3 = data_input_to_tensor(input3)
    assert output3[0].dtype == tf.float64
    assert output3[1][0].dtype == tf.float16
    assert output3[1][1].dtype == tf.float16
Beispiel #5
0
    def _set_data(self):
        from gpflow.models.util import (  # pylint:disable=import-outside-toplevel
            data_input_to_tensor, )

        for i, model in enumerate(self.models):
            model.data = data_input_to_tensor((
                self.design_space[self.sampled[:, i]],
                self.y[self.sampled[:, i], i].reshape(-1, 1),
            ))
Beispiel #6
0
 def __init__(
     self,
     data: RegressionData,
     kernel: Kernel,
     mean_function: Optional[MeanFunction] = None,
     noise_variance: float = 1.0,
 ):
     likelihood = gpflow.likelihoods.Gaussian(noise_variance)
     _, Y_data = data
     super().__init__(kernel,
                      likelihood,
                      mean_function,
                      num_latent_gps=Y_data.shape[-1])
     self.data = data_input_to_tensor(data)
def run_one(seed, covariance_function, gp_model, n_training, n_pred):
    t, ft, t_pred, ft_pred, y = get_data(seed, n_training, n_pred)
    gp_dtype = gpf.config.default_float()

    if gp_model is None:
        model_name = ModelEnum(FLAGS.model)
        gp_model = get_model(model_name, (t, y), FLAGS.noise_variance,
                             covariance_function, t.shape[0] + t_pred.shape[0])
    else:
        gp_model.data = data_input_to_tensor((t, y))

    tensor_t_pred = tf.convert_to_tensor(t_pred, dtype=gp_dtype)
    y_pred, _ = gp_model.predict_f(tensor_t_pred)
    error = rmse(y_pred, ft_pred)
    return error, gp_model
Beispiel #8
0
    def __init__(
            self,
            data: RegressionData,
            kernel: mf.kernels.MOStationary,
            mean_function: Optional[mf.mean_functions.MOMeanFunction] = None,
            noise_variance: float = 1.0):
        """

        Args:
            data: Tuple[InputData, OutputData], which determines L, M and N. Both InputData and OutputData must be of rank 2.
            kernel: Must be well-formed, with an (L,L) variance and an (L,M) lengthscales matrix.
            mean_function: Defaults to Zero.
            noise_variance: Broadcast to (diagonal) (L,L) if necessary.
        """
        self._X, self._Y = self.data = data_input_to_tensor(data)
        if (rank := tf.rank(self._X)) != (required_rank := 2):
Beispiel #9
0
    def __init__(
            self,
            value,
            name: str = 'Variance',
            cholesky_diagonal_lower_bound: float = CHOLESKY_DIAGONAL_LOWER_BOUND
    ):
        """ Construct a non-diagonal covariance matrix. Mutable only through it's properties cholesky_diagonal and cholesky_lower_triangle.

        Args:
            value: A symmetric, positive definite matrix, expressed in tensorflow or numpy.
            cholesky_diagonal_lower_bound: Lower bound on the diagonal of the Cholesky decomposition.
        """
        super().__init__(name=name)
        value = data_input_to_tensor(value)
        self._shape = (value.shape[-1], value.shape[-1])
        self._broadcast_shape = (value.shape[-1], 1, value.shape[-1], 1)
        if value.shape != self._shape:
            raise ValueError('Variance must have shape (L,L).')

        cholesky = tf.linalg.cholesky(value)

        self._cholesky_diagonal = tf.linalg.diag_part(cholesky)
        if min(self._cholesky_diagonal) <= cholesky_diagonal_lower_bound:
            raise ValueError(
                f'The Cholesky diagonal of {name} must be strictly greater than {cholesky_diagonal_lower_bound}.'
            )
        self._cholesky_diagonal = Parameter(
            self._cholesky_diagonal,
            transform=positive(lower=cholesky_diagonal_lower_bound),
            name=name + '.cholesky_diagonal')

        mask = sum([
            list(range(i * self._shape[0], i * (self._shape[0] + 1)))
            for i in range(1, self._shape[0])
        ],
                   start=[])
        self._cholesky_lower_triangle = Parameter(
            tf.gather(tf.reshape(cholesky, [-1]), mask),
            name=name + '.cholesky_lower_triangle')

        self._row_lengths = tuple(range(self._shape[0]))
Beispiel #10
0
        Args:
            data: Tuple[InputData, OutputData], which determines L, M and N. Both InputData and OutputData must be of rank 2.
            kernel: Must be well-formed, with an (L,L) variance and an (L,M) lengthscales matrix.
            mean_function: Defaults to Zero.
            noise_variance: Broadcast to (diagonal) (L,L) if necessary.
        """
        self._X, self._Y = self.data = data_input_to_tensor(data)
        if (rank := tf.rank(self._X)) != (required_rank := 2):
            raise IndexError(
                f'X should be of rank {required_rank} instead of {rank}.')
        self._N, self._M = self._X.shape
        self._L = self._Y.shape[-1]
        if (shape := self._Y.shape) != (required_shape := (self._N, self._L)):
            raise IndexError(
                f'Y.shape should be {required_shape} instead of {shape}.')
        self._Y = tf.reshape(
            tf.transpose(self._Y),
            [-1, 1])  # self_Y is now concatenated into an (LN,)-vector
        if tf.shape(noise_variance).numpy != (self._L, self._L):
            noise_variance = tf.broadcast_to(
                data_input_to_tensor(noise_variance), (self._L, self._L))
            noise_variance = tf.linalg.band_part(noise_variance, 0, 0)
        likelihood = mf.likelihoods.MOGaussian(noise_variance)
        if mean_function is None:
            mean_function = mf.mean_functions.MOMeanFunction(self._L)
        super().__init__(kernel, likelihood, mean_function, num_latent_gps=1)
        self._mean = tf.reshape(self.mean_function(self._X), [-1, 1])
        self._K_unit_variance = None if self.kernel.lengthscales.trainable else self.kernel.K_unit_variance(
            self._X)
Beispiel #11
0
    def __init__(
        self,
        data: OutputData,
        split_space: bool, 
        Xp_mean: tf.Tensor,
        Xp_var: tf.Tensor,
        pi: tf.Tensor,
        kernel_K: List[Kernel],
        Zp: tf.Tensor,
        Xs_mean=None,
        Xs_var=None,
        kernel_s=None,
        Zs=None,
        Xs_prior_mean=None,
        Xs_prior_var=None,
        Xp_prior_mean=None,
        Xp_prior_var=None,
        pi_prior=None
    ):
        """
        Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood.

        :param data: data matrix, size N (number of points) x D (dimensions)
        :param: split_space, if true, have both shared and private space; 
            if false, only have private spaces (note: to recover GPLVM, set split_space=False and let K=1)
        :param Xp_mean: mean latent positions in the private space [N, Qp] (Qp is the dimension of the private space)
        :param Xp_var: variance of the latent positions in the private space [N, Qp]
        :param pi: mixture responsibility of each category to each point [N, K] (K is the number of categories), i.e. q(c)
        :param kernel_K: private space kernel, one for each category
        :param Zp: inducing inputs of the private space [M, Qp]
        :param num_inducing_variables: number of inducing points, M
        :param Xs_mean: mean latent positions in the shared space [N, Qs] (Qs is the dimension of the shared space). i.e. mus in q(Xs) ~ N(Xs | mus, Ss)
        :param Xs_var: variance of latent positions in shared space [N, Qs], i.e. Ss, assumed diagonal
        :param kernel_s: shared space kernel 
        :param Zs: inducing inputs of the shared space [M, Qs] (M is the number of inducing points)
        :param Xs_prior_mean: prior mean used in KL term of bound, [N, Qs]. By default 0. mean in p(Xs)
        :param Xs_prior_var: prior variance used in KL term of bound, [N, Qs]. By default 1. variance in p(Xs)
        :param Xp_prior_mean: prior mean used in KL term of bound, [N, Qp]. By default 0. mean in p(Xp)
        :param Xp_prior_var: prior variance used in KL term of bound, [N, Qp]. By default 1. variance in p(Xp)
        :param pi_prior: prior mixture weights used in KL term of the bound, [N, K]. By default uniform. p(c)        
        """

        # if don't want shared space, set shared space to none --> get a mixture of GPLVM
        # if don't want private space, set shared space to none, set K = 1 and only include 1 kernel in `kernel_K` --> recover the original GPLVM 

        # TODO: think about how to do this with minibatch
        # it's awkward since w/ minibatch the model usually doesn't store the data internally
        # but for gplvm, you need to keep the q(xn) for all the n's
        # so you need to know which ones to update for each minibatch, probably can be solved but not pretty
        # using inference network / back constraints will solve this, since we will be keeping a global set of parameters
        # rather than a set for each q(xn)
        self.N, self.D = data.shape
        self.Qp = Xp_mean.shape[1]
        self.K = pi.shape[1]
        self.split_space = split_space

        assert Xp_var.ndim == 2
        assert len(kernel_K) == self.K
        assert np.all(Xp_mean.shape == Xp_var.shape)
        assert Xp_mean.shape[0] == self.N, "Xp_mean and Y must be of same size"
        assert pi.shape[0] == self.N, "pi and Y must be of the same size"

        super().__init__()
        self.likelihood = likelihoods.Gaussian()
        self.kernel_K = kernel_K
        self.data = data_input_to_tensor(data)
        # the covariance of q(X) as a [N, Q] matrix, the assumption is that Sn's are diagonal
        # i.e. the latent dimensions are uncorrelated
        # otherwise would require a [N, Q, Q] matrix
        self.Xp_mean = Parameter(Xp_mean)
        self.Xp_var = Parameter(Xp_var, transform=positive())
        self.pi = Parameter(pi, transform=tfp.bijectors.SoftmaxCentered())
        self.Zp = inducingpoint_wrapper(Zp)
        self.M = len(self.Zp)

        # initialize the variational parameters for q(U), same way as in SVGP
        # q_mu: List[K], mean of the inducing variables U [M, D], i.e m in q(U) ~ N(U | m, S), 
        #   initialized as zeros
        # q_sqrt: List[K], cholesky of the covariance matrix of the inducing variables [D, M, M]
        #   q_diag is false because natural gradient only works for full covariance
        #   initialized as all identities
        # we need K sets of q(Uk), each approximating fs+fk
        self.q_mu = []
        self.q_sqrt = []
        for k in range(self.K):
            q_mu = np.zeros((self.M, self.D))
            q_mu = Parameter(q_mu, dtype=default_float())  # [M, D]
            self.q_mu.append(q_mu)

            q_sqrt = [
                np.eye(self.M, dtype=default_float()) for _ in range(self.D)
            ]
            q_sqrt = np.array(q_sqrt)
            q_sqrt = Parameter(q_sqrt, transform=triangular())  # [D, M, M]
            self.q_sqrt.append(q_sqrt)

        # deal with parameters for the prior 
        if Xp_prior_mean is None:
            Xp_prior_mean = tf.zeros((self.N, self.Qp), dtype=default_float())
        if Xp_prior_var is None:
            Xp_prior_var = tf.ones((self.N, self.Qp))
        if pi_prior is None:
            pi_prior = tf.ones((self.N, self.K), dtype=default_float()) * 1/self.K

        self.Xp_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xp_prior_mean), dtype=default_float())
        self.Xp_prior_var = tf.convert_to_tensor(np.atleast_1d(Xp_prior_var), dtype=default_float()) 
        self.pi_prior = tf.convert_to_tensor(np.atleast_1d(pi_prior), dtype=default_float()) 


        # if we have both shared space and private space, need to initialize the parameters for the shared space
        if split_space:
            assert Xs_mean is not None and Xs_var is not None and kernel_s is not None and Zs is not None, 'Xs_mean, Xs_var, kernel_s, Zs need to be initialize if `split_space=True`'
            assert Xs_var.ndim == 2 
            assert np.all(Xs_mean.shape == Xs_var.shape)
            assert Xs_mean.shape[0] == self.N, "Xs_mean and Y must be of same size"
            self.Qs = Xs_mean.shape[1]
            self.kernel_s = kernel_s
            self.Xs_mean = Parameter(Xs_mean)
            self.Xs_var = Parameter(Xs_var, transform=positive())
            self.Zs = inducingpoint_wrapper(Zs)

            if len(Zs) != len(Zp):
                raise ValueError(
                    '`Zs` and `Zp` should have the same length'
                )

            if Xs_prior_mean is None:
                Xs_prior_mean = tf.zeros((self.N, self.Qs), dtype=default_float())
            if Xs_prior_var is None:
                Xs_prior_var = tf.ones((self.N, self.Qs))
            self.Xs_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xs_prior_mean), dtype=default_float())
            self.Xs_prior_var = tf.convert_to_tensor(np.atleast_1d(Xs_prior_var), dtype=default_float())

        self.Fq = tf.zeros((self.N, self.K), dtype=default_float())
Beispiel #12
0
    def __init__(
        self,
        data: OutputData,
        kernel: Optional[Kernel] = None,
        latent_dimensions: Optional[int] = 2,
        num_inducing_variables: Optional[int] = None,
        inducing_variable=None,
        *,
        mean_function=None,
        q_diag: bool = False,
        q_mu=None,
        q_sqrt=None,
        whiten: bool = False,
    ):
        """
        - kernel, likelihood, inducing_variables, mean_function are appropriate
          GPflow objects
        - num_latent_gps is the number of latent processes to use, defaults to 2, as
          the dimensionality reduction is at dimensions 2
        - q_diag is a boolean. If True, the covariance is approximated by a
          diagonal matrix.
        - whiten is a boolean. If True, we use the whitened representation of
          the inducing points.
        - num_data is the total number of observations, defaults to X.shape[0]
          (relevant when feeding in external minibatches)
        """

        self.latent_dimensions = latent_dimensions

        #grab data
        self.data = data_input_to_tensor(data)

        #define lat-space initialization
        X_data_mean = pca_reduce(data, self.latent_dimensions)

        num_data, num_latent_gps = data.shape

        self.num_data = num_data

        X_data_var = tf.ones((self.num_data, self.latent_dimensions),
                             dtype=default_float())

        assert X_data_var.ndim == 2

        #def kernel
        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        #init Parameters latent
        self.X_data_mean = Parameter(X_data_mean)
        self.X_data_var = Parameter(X_data_var, transform=positive())

        #init parameter inducing point
        if (inducing_variable is None) == (num_inducing_variables is None):
            raise ValueError(
                "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`"
            )

        if inducing_variable is None:
            # By default we initialize by subset of initial latent points
            # Note that tf.random.shuffle returns a copy, it does not shuffle in-place
            #maybe use k-means clustering
            Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables]
            inducing_variable = InducingPoints(Z)

        self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        #loss placeholder for analysis purpuse
        self.loss_placeholder = defaultdict(
            list, {k: []
                   for k in ("KL_x", "ELBO", "KL_u")})

        # deal with parameters for the prior mean variance of X
        X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions),
                                dtype=default_float())
        X_prior_var = tf.ones((self.num_data, self.latent_dimensions),
                              dtype=default_float())

        self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean),
                                                 dtype=default_float())
        self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var),
                                                dtype=default_float())

        #sanity check

        assert np.all(X_data_mean.shape == X_data_var.shape)
        assert X_data_mean.shape[0] == self.data.shape[
            0], "X mean and Y must be same size."
        assert X_data_var.shape[0] == self.data.shape[
            0], "X var and Y must be same size."
        assert X_data_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_mean.shape[0] == self.num_data
        assert self.X_prior_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_var.shape[0] == self.num_data
        assert self.X_prior_var.shape[1] == self.latent_dimensions

        # init the super class, accept args
        super().__init__(kernel, likelihoods.Gaussian(variance=0.1),
                         mean_function, num_latent_gps)
        self.q_diag = q_diag
        self.whiten = whiten
        #self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        # init variational parameters
        num_inducing = self.inducing_variable.num_inducing
        self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)
Beispiel #13
0
    def __init__(
        self,
        data: OutputData,
        encoder,
        kernel: Optional[Kernel] = None,
        inducing_variable=None,
        X_prior_mean=None,
        X_prior_var=None,
    ):
        """
        Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood.

        :param data: data matrix, size N (number of points) x D (dimensions)
        :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions).
        :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space.
        :param kernel: kernel specification, by default Squared Exponential
        :param num_inducing_variables: number of inducing points, M
        :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default
            random permutation of X_data_mean.
        :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean.
        :param X_prior_var: prior variance used in KL term of bound. By default 1.
        """

        self.latent_dimensions = 2
        #grab data
        self.data = data_input_to_tensor(data)

        num_data, num_latent_gps = data.shape

        self.num_data = num_data

        #def kernel
        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        #init GPMODEL
        super().__init__(kernel,
                         likelihoods.Gaussian(variance=0.1),
                         num_latent_gps=num_latent_gps)

        #init parameter inducing point
        if (inducing_variable is None):
            raise ValueError(
                "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`"
            )
        else:
            self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        #loss placeholder for analysis purpuse
        self.loss_placeholder = defaultdict(list,
                                            {k: []
                                             for k in ("KL_x", "ELBO")})

        # deal with parameters for the prior mean variance of X
        if X_prior_mean is None:
            X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions),
                                    dtype=default_float())
        if X_prior_var is None:
            X_prior_var = tf.ones((self.num_data, self.latent_dimensions),
                                  dtype=default_float())

        self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean),
                                                 dtype=default_float())
        self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var),
                                                dtype=default_float())

        # Encoder
        self.encoder = encoder

        #sanity check
        # assert np.all(X_data_mean.shape == X_data_var.shape)
        # assert X_data_mean.shape[0] == self.data.shape[0], "X mean and Y must be same size."
        # assert X_data_var.shape[0] == self.data.shape[0], "X var and Y must be same size."
        # assert X_data_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_mean.shape[0] == self.num_data
        assert self.X_prior_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_var.shape[0] == self.num_data
        assert self.X_prior_var.shape[1] == self.latent_dimensions
Beispiel #14
0
    def __init__(
        self,
        data: OutputData,
        X_data_mean: Optional[tf.Tensor] = None,
        X_data_var: Optional[tf.Tensor] = None,
        kernel: Optional[Kernel] = None,
        num_inducing_variables: Optional[int] = None,
        inducing_variable=None,
        X_prior_mean=None,
        X_prior_var=None,
    ):
        """
        Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood.

        :param data: data matrix, size N (number of points) x D (dimensions)
        :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions).
        :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space.
        :param kernel: kernel specification, by default Squared Exponential
        :param num_inducing_variables: number of inducing points, M
        :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default
            random permutation of X_data_mean.
        :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean.
        :param X_prior_var: prior variance used in KL term of bound. By default 1.
        """

        self.latent_dimensions = 2
        #grab data
        self.data = data_input_to_tensor(data)

        #define lat-space initialization
        if X_data_mean is None:
            X_data_mean = pca_reduce(data, self.latent_dimensions)

        num_data, num_latent_gps = X_data_mean.shape

        self.num_data = num_data

        if X_data_var is None:
            X_data_var = tf.ones((self.num_data, self.latent_dimensions),
                                 dtype=default_float())

        assert X_data_var.ndim == 2

        self.output_dim = self.data.shape[-1]  #num_latent maybe

        #def kernel
        if kernel is None:
            kernel = gpflow.kernels.SquaredExponential()

        #init GPMODEL
        super().__init__(kernel,
                         likelihoods.Gaussian(variance=0.1),
                         num_latent_gps=num_latent_gps)

        #init Parameters latent
        self.X_data_mean = Parameter(X_data_mean)
        self.X_data_var = Parameter(X_data_var, transform=positive())

        #init parameter inducing point
        if (inducing_variable is None) == (num_inducing_variables is None):
            raise ValueError(
                "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`"
            )

        if inducing_variable is None:
            # By default we initialize by subset of initial latent points
            # Note that tf.random.shuffle returns a copy, it does not shuffle in-place
            #maybe use k-means clustering
            Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables]
            inducing_variable = InducingPoints(Z)

        self.inducing_variable = inducingpoint_wrapper(inducing_variable)

        #loss placeholder for analysis purpuse
        self.loss_placeholder = defaultdict(list,
                                            {k: []
                                             for k in ("KL_x", "ELBO")})

        # deal with parameters for the prior mean variance of X
        if X_prior_mean is None:
            X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions),
                                    dtype=default_float())
        if X_prior_var is None:
            X_prior_var = tf.ones((self.num_data, self.latent_dimensions),
                                  dtype=default_float())

        self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean),
                                                 dtype=default_float())
        self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var),
                                                dtype=default_float())

        #sanity check

        assert np.all(X_data_mean.shape == X_data_var.shape)
        assert X_data_mean.shape[0] == self.data.shape[
            0], "X mean and Y must be same size."
        assert X_data_var.shape[0] == self.data.shape[
            0], "X var and Y must be same size."
        assert X_data_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_mean.shape[0] == self.num_data
        assert self.X_prior_mean.shape[1] == self.latent_dimensions
        assert self.X_prior_var.shape[0] == self.num_data
        assert self.X_prior_var.shape[1] == self.latent_dimensions