def __init__(self, data: RegressionData, kernel, noise_variance: float = 1.0, parallel=False, max_parallel=10000): self.noise_variance = Parameter(noise_variance, transform=positive()) ts, ys = data_input_to_tensor(data) super().__init__(kernel, None, None, num_latent_gps=ys.shape[-1]) self.data = ts, ys filter_spec = kernel.get_spec(ts.shape[0]) filter_ys_spec = tf.TensorSpec((ts.shape[0], 1), config.default_float()) smoother_spec = kernel.get_spec(None) smoother_ys_spec = tf.TensorSpec((None, 1), config.default_float()) if not parallel: self._kf = tf.function( partial(kf, return_loglikelihood=True, return_predicted=False), input_signature=[filter_spec, filter_ys_spec]) self._kfs = tf.function( kfs, input_signature=[smoother_spec, smoother_ys_spec]) else: self._kf = tf.function( partial(pkf, return_loglikelihood=True, max_parallel=ts.shape[0]), input_signature=[filter_spec, filter_ys_spec]) self._kfs = tf.function( partial(pkfs, max_parallel=max_parallel), input_signature=[smoother_spec, smoother_ys_spec])
def predict_f(self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False) -> MeanAndVariance: r""" This method computes predictions at X \in R^{N \x D} input points .. math:: p(F* | Y) where F* are points on the GP at new data points, Y are noisy observations at training data points. Note that full_cov => full_output_cov (regardless of the ordinate given for full_output_cov), to avoid ambiguity. """ full_output_cov = True if full_cov else full_output_cov Xnew = tf.reshape(data_input_to_tensor(Xnew), (-1, self._M)) n = Xnew.shape[0] f_mean, f_var = base_conditional(Kmn=self.kernel(self._X, Xnew), Kmm=self.likelihood.add_to(self.KXX), Knn=self.kernel(Xnew, Xnew), f=self._Y - self._mean, full_cov=True, white=False) f_mean += tf.reshape(self.mean_function(Xnew), f_mean.shape) f_mean_shape = (self._L, n) f_mean = tf.reshape(f_mean, f_mean_shape) f_var = tf.reshape(f_var, f_mean_shape * 2) if full_output_cov: einsum = 'LNln -> LlNn' else: einsum = 'LNLn -> LNn' f_var = tf.einsum(einsum, f_var) if not full_cov: f_var = tf.einsum('...NN->...N', f_var) perm = tuple(reversed(range(tf.rank(f_var)))) return tf.transpose(f_mean), tf.transpose(f_var, perm)
def __init__(self, variance, lengthscales, name='Kernel', active_dims=None): """ Kernel Constructor. Args: variance: An (L,L) symmetric, positive definite matrix for the signal variance. lengthscales: An (L,M) matrix of positive definite lengthscales. is_lengthscales_trainable: Whether the lengthscales of this kernel are trainable. name: The name of this kernel. active_dims: Which of the input dimensions are used. The default None means all of them. """ super(AnisotropicStationary, self).__init__( active_dims=active_dims, name=name ) # Do not call gf.kernels.AnisotropicStationary.__init__()! self.variance = Variance(value=np.atleast_2d(variance), name=name + 'Variance') self._L = self.variance.shape[0] lengthscales = data_input_to_tensor(lengthscales) lengthscales_shape = tuple(tf.shape(lengthscales).numpy()) self._M = 1 if lengthscales_shape in ((), (1, ), (1, 1), ( self._L, )) else lengthscales_shape[-1] lengthscales = tf.reshape( tf.broadcast_to(lengthscales, (self._L, self._M)), (self._L, 1, self._M)) self.lengthscales = Parameter(lengthscales, transform=positive(), trainable=False, name=name + 'Lengthscales') self._validate_ard_active_dims(self.lengthscales[0, 0])
def test_data_input_to_tensor(): input1 = (1.0, (2.0, )) output1 = data_input_to_tensor(input1) assert output1[0].dtype == tf.float64 assert output1[1][0].dtype == tf.float64 input2 = (1.0, [2.0]) output2 = data_input_to_tensor(input2) assert output2[0].dtype == tf.float64 assert output2[1][0].dtype == tf.float64 input3 = (1.0, (np.arange(3, dtype=np.float16), ) * 2) output3 = data_input_to_tensor(input3) assert output3[0].dtype == tf.float64 assert output3[1][0].dtype == tf.float16 assert output3[1][1].dtype == tf.float16
def _set_data(self): from gpflow.models.util import ( # pylint:disable=import-outside-toplevel data_input_to_tensor, ) for i, model in enumerate(self.models): model.data = data_input_to_tensor(( self.design_space[self.sampled[:, i]], self.y[self.sampled[:, i], i].reshape(-1, 1), ))
def __init__( self, data: RegressionData, kernel: Kernel, mean_function: Optional[MeanFunction] = None, noise_variance: float = 1.0, ): likelihood = gpflow.likelihoods.Gaussian(noise_variance) _, Y_data = data super().__init__(kernel, likelihood, mean_function, num_latent_gps=Y_data.shape[-1]) self.data = data_input_to_tensor(data)
def run_one(seed, covariance_function, gp_model, n_training, n_pred): t, ft, t_pred, ft_pred, y = get_data(seed, n_training, n_pred) gp_dtype = gpf.config.default_float() if gp_model is None: model_name = ModelEnum(FLAGS.model) gp_model = get_model(model_name, (t, y), FLAGS.noise_variance, covariance_function, t.shape[0] + t_pred.shape[0]) else: gp_model.data = data_input_to_tensor((t, y)) tensor_t_pred = tf.convert_to_tensor(t_pred, dtype=gp_dtype) y_pred, _ = gp_model.predict_f(tensor_t_pred) error = rmse(y_pred, ft_pred) return error, gp_model
def __init__( self, data: RegressionData, kernel: mf.kernels.MOStationary, mean_function: Optional[mf.mean_functions.MOMeanFunction] = None, noise_variance: float = 1.0): """ Args: data: Tuple[InputData, OutputData], which determines L, M and N. Both InputData and OutputData must be of rank 2. kernel: Must be well-formed, with an (L,L) variance and an (L,M) lengthscales matrix. mean_function: Defaults to Zero. noise_variance: Broadcast to (diagonal) (L,L) if necessary. """ self._X, self._Y = self.data = data_input_to_tensor(data) if (rank := tf.rank(self._X)) != (required_rank := 2):
def __init__( self, value, name: str = 'Variance', cholesky_diagonal_lower_bound: float = CHOLESKY_DIAGONAL_LOWER_BOUND ): """ Construct a non-diagonal covariance matrix. Mutable only through it's properties cholesky_diagonal and cholesky_lower_triangle. Args: value: A symmetric, positive definite matrix, expressed in tensorflow or numpy. cholesky_diagonal_lower_bound: Lower bound on the diagonal of the Cholesky decomposition. """ super().__init__(name=name) value = data_input_to_tensor(value) self._shape = (value.shape[-1], value.shape[-1]) self._broadcast_shape = (value.shape[-1], 1, value.shape[-1], 1) if value.shape != self._shape: raise ValueError('Variance must have shape (L,L).') cholesky = tf.linalg.cholesky(value) self._cholesky_diagonal = tf.linalg.diag_part(cholesky) if min(self._cholesky_diagonal) <= cholesky_diagonal_lower_bound: raise ValueError( f'The Cholesky diagonal of {name} must be strictly greater than {cholesky_diagonal_lower_bound}.' ) self._cholesky_diagonal = Parameter( self._cholesky_diagonal, transform=positive(lower=cholesky_diagonal_lower_bound), name=name + '.cholesky_diagonal') mask = sum([ list(range(i * self._shape[0], i * (self._shape[0] + 1))) for i in range(1, self._shape[0]) ], start=[]) self._cholesky_lower_triangle = Parameter( tf.gather(tf.reshape(cholesky, [-1]), mask), name=name + '.cholesky_lower_triangle') self._row_lengths = tuple(range(self._shape[0]))
Args: data: Tuple[InputData, OutputData], which determines L, M and N. Both InputData and OutputData must be of rank 2. kernel: Must be well-formed, with an (L,L) variance and an (L,M) lengthscales matrix. mean_function: Defaults to Zero. noise_variance: Broadcast to (diagonal) (L,L) if necessary. """ self._X, self._Y = self.data = data_input_to_tensor(data) if (rank := tf.rank(self._X)) != (required_rank := 2): raise IndexError( f'X should be of rank {required_rank} instead of {rank}.') self._N, self._M = self._X.shape self._L = self._Y.shape[-1] if (shape := self._Y.shape) != (required_shape := (self._N, self._L)): raise IndexError( f'Y.shape should be {required_shape} instead of {shape}.') self._Y = tf.reshape( tf.transpose(self._Y), [-1, 1]) # self_Y is now concatenated into an (LN,)-vector if tf.shape(noise_variance).numpy != (self._L, self._L): noise_variance = tf.broadcast_to( data_input_to_tensor(noise_variance), (self._L, self._L)) noise_variance = tf.linalg.band_part(noise_variance, 0, 0) likelihood = mf.likelihoods.MOGaussian(noise_variance) if mean_function is None: mean_function = mf.mean_functions.MOMeanFunction(self._L) super().__init__(kernel, likelihood, mean_function, num_latent_gps=1) self._mean = tf.reshape(self.mean_function(self._X), [-1, 1]) self._K_unit_variance = None if self.kernel.lengthscales.trainable else self.kernel.K_unit_variance( self._X)
def __init__( self, data: OutputData, split_space: bool, Xp_mean: tf.Tensor, Xp_var: tf.Tensor, pi: tf.Tensor, kernel_K: List[Kernel], Zp: tf.Tensor, Xs_mean=None, Xs_var=None, kernel_s=None, Zs=None, Xs_prior_mean=None, Xs_prior_var=None, Xp_prior_mean=None, Xp_prior_var=None, pi_prior=None ): """ Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood. :param data: data matrix, size N (number of points) x D (dimensions) :param: split_space, if true, have both shared and private space; if false, only have private spaces (note: to recover GPLVM, set split_space=False and let K=1) :param Xp_mean: mean latent positions in the private space [N, Qp] (Qp is the dimension of the private space) :param Xp_var: variance of the latent positions in the private space [N, Qp] :param pi: mixture responsibility of each category to each point [N, K] (K is the number of categories), i.e. q(c) :param kernel_K: private space kernel, one for each category :param Zp: inducing inputs of the private space [M, Qp] :param num_inducing_variables: number of inducing points, M :param Xs_mean: mean latent positions in the shared space [N, Qs] (Qs is the dimension of the shared space). i.e. mus in q(Xs) ~ N(Xs | mus, Ss) :param Xs_var: variance of latent positions in shared space [N, Qs], i.e. Ss, assumed diagonal :param kernel_s: shared space kernel :param Zs: inducing inputs of the shared space [M, Qs] (M is the number of inducing points) :param Xs_prior_mean: prior mean used in KL term of bound, [N, Qs]. By default 0. mean in p(Xs) :param Xs_prior_var: prior variance used in KL term of bound, [N, Qs]. By default 1. variance in p(Xs) :param Xp_prior_mean: prior mean used in KL term of bound, [N, Qp]. By default 0. mean in p(Xp) :param Xp_prior_var: prior variance used in KL term of bound, [N, Qp]. By default 1. variance in p(Xp) :param pi_prior: prior mixture weights used in KL term of the bound, [N, K]. By default uniform. p(c) """ # if don't want shared space, set shared space to none --> get a mixture of GPLVM # if don't want private space, set shared space to none, set K = 1 and only include 1 kernel in `kernel_K` --> recover the original GPLVM # TODO: think about how to do this with minibatch # it's awkward since w/ minibatch the model usually doesn't store the data internally # but for gplvm, you need to keep the q(xn) for all the n's # so you need to know which ones to update for each minibatch, probably can be solved but not pretty # using inference network / back constraints will solve this, since we will be keeping a global set of parameters # rather than a set for each q(xn) self.N, self.D = data.shape self.Qp = Xp_mean.shape[1] self.K = pi.shape[1] self.split_space = split_space assert Xp_var.ndim == 2 assert len(kernel_K) == self.K assert np.all(Xp_mean.shape == Xp_var.shape) assert Xp_mean.shape[0] == self.N, "Xp_mean and Y must be of same size" assert pi.shape[0] == self.N, "pi and Y must be of the same size" super().__init__() self.likelihood = likelihoods.Gaussian() self.kernel_K = kernel_K self.data = data_input_to_tensor(data) # the covariance of q(X) as a [N, Q] matrix, the assumption is that Sn's are diagonal # i.e. the latent dimensions are uncorrelated # otherwise would require a [N, Q, Q] matrix self.Xp_mean = Parameter(Xp_mean) self.Xp_var = Parameter(Xp_var, transform=positive()) self.pi = Parameter(pi, transform=tfp.bijectors.SoftmaxCentered()) self.Zp = inducingpoint_wrapper(Zp) self.M = len(self.Zp) # initialize the variational parameters for q(U), same way as in SVGP # q_mu: List[K], mean of the inducing variables U [M, D], i.e m in q(U) ~ N(U | m, S), # initialized as zeros # q_sqrt: List[K], cholesky of the covariance matrix of the inducing variables [D, M, M] # q_diag is false because natural gradient only works for full covariance # initialized as all identities # we need K sets of q(Uk), each approximating fs+fk self.q_mu = [] self.q_sqrt = [] for k in range(self.K): q_mu = np.zeros((self.M, self.D)) q_mu = Parameter(q_mu, dtype=default_float()) # [M, D] self.q_mu.append(q_mu) q_sqrt = [ np.eye(self.M, dtype=default_float()) for _ in range(self.D) ] q_sqrt = np.array(q_sqrt) q_sqrt = Parameter(q_sqrt, transform=triangular()) # [D, M, M] self.q_sqrt.append(q_sqrt) # deal with parameters for the prior if Xp_prior_mean is None: Xp_prior_mean = tf.zeros((self.N, self.Qp), dtype=default_float()) if Xp_prior_var is None: Xp_prior_var = tf.ones((self.N, self.Qp)) if pi_prior is None: pi_prior = tf.ones((self.N, self.K), dtype=default_float()) * 1/self.K self.Xp_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xp_prior_mean), dtype=default_float()) self.Xp_prior_var = tf.convert_to_tensor(np.atleast_1d(Xp_prior_var), dtype=default_float()) self.pi_prior = tf.convert_to_tensor(np.atleast_1d(pi_prior), dtype=default_float()) # if we have both shared space and private space, need to initialize the parameters for the shared space if split_space: assert Xs_mean is not None and Xs_var is not None and kernel_s is not None and Zs is not None, 'Xs_mean, Xs_var, kernel_s, Zs need to be initialize if `split_space=True`' assert Xs_var.ndim == 2 assert np.all(Xs_mean.shape == Xs_var.shape) assert Xs_mean.shape[0] == self.N, "Xs_mean and Y must be of same size" self.Qs = Xs_mean.shape[1] self.kernel_s = kernel_s self.Xs_mean = Parameter(Xs_mean) self.Xs_var = Parameter(Xs_var, transform=positive()) self.Zs = inducingpoint_wrapper(Zs) if len(Zs) != len(Zp): raise ValueError( '`Zs` and `Zp` should have the same length' ) if Xs_prior_mean is None: Xs_prior_mean = tf.zeros((self.N, self.Qs), dtype=default_float()) if Xs_prior_var is None: Xs_prior_var = tf.ones((self.N, self.Qs)) self.Xs_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xs_prior_mean), dtype=default_float()) self.Xs_prior_var = tf.convert_to_tensor(np.atleast_1d(Xs_prior_var), dtype=default_float()) self.Fq = tf.zeros((self.N, self.K), dtype=default_float())
def __init__( self, data: OutputData, kernel: Optional[Kernel] = None, latent_dimensions: Optional[int] = 2, num_inducing_variables: Optional[int] = None, inducing_variable=None, *, mean_function=None, q_diag: bool = False, q_mu=None, q_sqrt=None, whiten: bool = False, ): """ - kernel, likelihood, inducing_variables, mean_function are appropriate GPflow objects - num_latent_gps is the number of latent processes to use, defaults to 2, as the dimensionality reduction is at dimensions 2 - q_diag is a boolean. If True, the covariance is approximated by a diagonal matrix. - whiten is a boolean. If True, we use the whitened representation of the inducing points. - num_data is the total number of observations, defaults to X.shape[0] (relevant when feeding in external minibatches) """ self.latent_dimensions = latent_dimensions #grab data self.data = data_input_to_tensor(data) #define lat-space initialization X_data_mean = pca_reduce(data, self.latent_dimensions) num_data, num_latent_gps = data.shape self.num_data = num_data X_data_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) assert X_data_var.ndim == 2 #def kernel if kernel is None: kernel = gpflow.kernels.SquaredExponential() #init Parameters latent self.X_data_mean = Parameter(X_data_mean) self.X_data_var = Parameter(X_data_var, transform=positive()) #init parameter inducing point if (inducing_variable is None) == (num_inducing_variables is None): raise ValueError( "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`" ) if inducing_variable is None: # By default we initialize by subset of initial latent points # Note that tf.random.shuffle returns a copy, it does not shuffle in-place #maybe use k-means clustering Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables] inducing_variable = InducingPoints(Z) self.inducing_variable = inducingpoint_wrapper(inducing_variable) #loss placeholder for analysis purpuse self.loss_placeholder = defaultdict( list, {k: [] for k in ("KL_x", "ELBO", "KL_u")}) # deal with parameters for the prior mean variance of X X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions), dtype=default_float()) X_prior_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean), dtype=default_float()) self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var), dtype=default_float()) #sanity check assert np.all(X_data_mean.shape == X_data_var.shape) assert X_data_mean.shape[0] == self.data.shape[ 0], "X mean and Y must be same size." assert X_data_var.shape[0] == self.data.shape[ 0], "X var and Y must be same size." assert X_data_mean.shape[1] == self.latent_dimensions assert self.X_prior_mean.shape[0] == self.num_data assert self.X_prior_mean.shape[1] == self.latent_dimensions assert self.X_prior_var.shape[0] == self.num_data assert self.X_prior_var.shape[1] == self.latent_dimensions # init the super class, accept args super().__init__(kernel, likelihoods.Gaussian(variance=0.1), mean_function, num_latent_gps) self.q_diag = q_diag self.whiten = whiten #self.inducing_variable = inducingpoint_wrapper(inducing_variable) # init variational parameters num_inducing = self.inducing_variable.num_inducing self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)
def __init__( self, data: OutputData, encoder, kernel: Optional[Kernel] = None, inducing_variable=None, X_prior_mean=None, X_prior_var=None, ): """ Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood. :param data: data matrix, size N (number of points) x D (dimensions) :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions). :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space. :param kernel: kernel specification, by default Squared Exponential :param num_inducing_variables: number of inducing points, M :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default random permutation of X_data_mean. :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean. :param X_prior_var: prior variance used in KL term of bound. By default 1. """ self.latent_dimensions = 2 #grab data self.data = data_input_to_tensor(data) num_data, num_latent_gps = data.shape self.num_data = num_data #def kernel if kernel is None: kernel = gpflow.kernels.SquaredExponential() #init GPMODEL super().__init__(kernel, likelihoods.Gaussian(variance=0.1), num_latent_gps=num_latent_gps) #init parameter inducing point if (inducing_variable is None): raise ValueError( "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`" ) else: self.inducing_variable = inducingpoint_wrapper(inducing_variable) #loss placeholder for analysis purpuse self.loss_placeholder = defaultdict(list, {k: [] for k in ("KL_x", "ELBO")}) # deal with parameters for the prior mean variance of X if X_prior_mean is None: X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions), dtype=default_float()) if X_prior_var is None: X_prior_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean), dtype=default_float()) self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var), dtype=default_float()) # Encoder self.encoder = encoder #sanity check # assert np.all(X_data_mean.shape == X_data_var.shape) # assert X_data_mean.shape[0] == self.data.shape[0], "X mean and Y must be same size." # assert X_data_var.shape[0] == self.data.shape[0], "X var and Y must be same size." # assert X_data_mean.shape[1] == self.latent_dimensions assert self.X_prior_mean.shape[0] == self.num_data assert self.X_prior_mean.shape[1] == self.latent_dimensions assert self.X_prior_var.shape[0] == self.num_data assert self.X_prior_var.shape[1] == self.latent_dimensions
def __init__( self, data: OutputData, X_data_mean: Optional[tf.Tensor] = None, X_data_var: Optional[tf.Tensor] = None, kernel: Optional[Kernel] = None, num_inducing_variables: Optional[int] = None, inducing_variable=None, X_prior_mean=None, X_prior_var=None, ): """ Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood. :param data: data matrix, size N (number of points) x D (dimensions) :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions). :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space. :param kernel: kernel specification, by default Squared Exponential :param num_inducing_variables: number of inducing points, M :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default random permutation of X_data_mean. :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean. :param X_prior_var: prior variance used in KL term of bound. By default 1. """ self.latent_dimensions = 2 #grab data self.data = data_input_to_tensor(data) #define lat-space initialization if X_data_mean is None: X_data_mean = pca_reduce(data, self.latent_dimensions) num_data, num_latent_gps = X_data_mean.shape self.num_data = num_data if X_data_var is None: X_data_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) assert X_data_var.ndim == 2 self.output_dim = self.data.shape[-1] #num_latent maybe #def kernel if kernel is None: kernel = gpflow.kernels.SquaredExponential() #init GPMODEL super().__init__(kernel, likelihoods.Gaussian(variance=0.1), num_latent_gps=num_latent_gps) #init Parameters latent self.X_data_mean = Parameter(X_data_mean) self.X_data_var = Parameter(X_data_var, transform=positive()) #init parameter inducing point if (inducing_variable is None) == (num_inducing_variables is None): raise ValueError( "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`" ) if inducing_variable is None: # By default we initialize by subset of initial latent points # Note that tf.random.shuffle returns a copy, it does not shuffle in-place #maybe use k-means clustering Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables] inducing_variable = InducingPoints(Z) self.inducing_variable = inducingpoint_wrapper(inducing_variable) #loss placeholder for analysis purpuse self.loss_placeholder = defaultdict(list, {k: [] for k in ("KL_x", "ELBO")}) # deal with parameters for the prior mean variance of X if X_prior_mean is None: X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions), dtype=default_float()) if X_prior_var is None: X_prior_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean), dtype=default_float()) self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var), dtype=default_float()) #sanity check assert np.all(X_data_mean.shape == X_data_var.shape) assert X_data_mean.shape[0] == self.data.shape[ 0], "X mean and Y must be same size." assert X_data_var.shape[0] == self.data.shape[ 0], "X var and Y must be same size." assert X_data_mean.shape[1] == self.latent_dimensions assert self.X_prior_mean.shape[0] == self.num_data assert self.X_prior_mean.shape[1] == self.latent_dimensions assert self.X_prior_var.shape[0] == self.num_data assert self.X_prior_var.shape[1] == self.latent_dimensions