def test_linear_coregionalization_shi( register_posterior_test, q_sqrt_factory, full_cov, full_output_cov, whiten, num_latent_gps, output_dims, ): """ Linear coregionalization with shared independent inducing variables. """ kernel = gpflow.kernels.LinearCoregionalization( [gpflow.kernels.SquaredExponential() for _ in range(num_latent_gps)], W=tf.random.normal((output_dims, num_latent_gps)), ) inducing_variable = gpflow.inducing_variables.SharedIndependentInducingVariables( inducingpoint_wrapper(np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) ) q_mu = np.random.randn(NUM_INDUCING_POINTS, num_latent_gps) q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, num_latent_gps) conditional = create_conditional( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) posterior = create_posterior( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) register_posterior_test(posterior, LinearCoregionalizationPosterior) _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional( posterior, conditional, full_cov, full_output_cov )
def _init_layers(self, X, Y, Z, q_sqrt_initial, kernels, mean_function=Zero(), Layer=SVGPLayer, white=False): """ The first layer only models between input and output_1, The second layer models between input and output_2, output_1 and output_2, The inducing point for each layer for input dimension should be shared? The induing point for output dimension should be calculated instead of changing?""" layers = [] self.inducing_inputs = inducingpoint_wrapper(Z[:, :self.m]) inducing_inputs = self.inducing_inputs.Z for i in range(self.num_outputs): layer = Layer(kernels[i], inducing_inputs, Z[:, self.m + i], q_sqrt_initial[:, i], mean_function, white=white) layers.append(layer) inducing_inputs = tf.concat([inducing_inputs, layer.q_mu], axis=1) return layers
def test_posterior_create_with_variables_update_cache_works( q_sqrt_factory, whiten): # setup posterior kernel = gpflow.kernels.SquaredExponential() inducing_variable = inducingpoint_wrapper( np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) q_mu = tf.Variable(np.random.randn(NUM_INDUCING_POINTS, 1)) initial_q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1) if initial_q_sqrt is not None: q_sqrt = tf.Variable(initial_q_sqrt) else: q_sqrt = initial_q_sqrt posterior = IndependentPosteriorSingleOutput( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, precompute_cache=PrecomputeCacheType.VARIABLE, ) assert isinstance(posterior.alpha, tf.Variable) assert isinstance(posterior.Qinv, tf.Variable) alpha = posterior.alpha Qinv = posterior.Qinv posterior.update_cache() assert posterior.alpha is alpha assert posterior.Qinv is Qinv
def test_independent_single_output(register_posterior_test, q_sqrt_factory, whiten, full_cov, full_output_cov): kernel = gpflow.kernels.SquaredExponential() inducing_variable = inducingpoint_wrapper( np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) q_mu = np.random.randn(NUM_INDUCING_POINTS, 1) q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1) conditional = create_conditional( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) posterior = create_posterior( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) register_posterior_test(posterior, IndependentPosteriorSingleOutput) _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional( posterior, conditional, full_cov, full_output_cov)
def test_independent_multi_output_sek_shi( register_posterior_test, q_sqrt_factory, full_cov, full_output_cov, whiten, num_latent_gps, output_dims, ): """ Independent multi-output posterior with separate independent kernels and shared inducing points. """ kernel = gpflow.kernels.SeparateIndependent( [gpflow.kernels.SquaredExponential() for _ in range(num_latent_gps)] ) inducing_variable = gpflow.inducing_variables.SharedIndependentInducingVariables( inducingpoint_wrapper(np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) ) q_mu = np.random.randn(NUM_INDUCING_POINTS, num_latent_gps) q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, num_latent_gps) conditional = create_conditional( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) posterior = create_posterior( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) register_posterior_test(posterior, IndependentPosteriorMultiOutput) _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional( posterior, conditional, full_cov, full_output_cov )
def test_fallback_independent_multi_output_shi( register_posterior_test, q_sqrt_factory, full_cov, full_output_cov, whiten, output_dims, ): """ Fallback posterior with shared independent inducing variables. The FallbackIndependentLatentPosterior is a subclass of the FullyCorrelatedPosterior which requires a single latent GP function. """ kernel = gpflow.kernels.LinearCoregionalization( [gpflow.kernels.SquaredExponential()], W=tf.random.normal((output_dims, 1)) ) inducing_variable = gpflow.inducing_variables.FallbackSharedIndependentInducingVariables( inducingpoint_wrapper(np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) ) q_mu = np.random.randn(NUM_INDUCING_POINTS, 1) q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1) conditional = create_conditional( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) posterior = create_posterior( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) register_posterior_test(posterior, FallbackIndependentLatentPosterior) _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional( posterior, conditional, full_cov, full_output_cov )
def test_fully_correlated_multi_output( register_posterior_test, q_sqrt_factory, full_cov, full_output_cov, whiten, output_dims, ): """ The fully correlated posterior has one latent GP. """ kernel = gpflow.kernels.SharedIndependent( gpflow.kernels.SquaredExponential(), output_dim=output_dims ) inducing_variable = inducingpoint_wrapper(np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) q_mu = np.random.randn(output_dims * NUM_INDUCING_POINTS, 1) q_sqrt = q_sqrt_factory(output_dims * NUM_INDUCING_POINTS, 1) conditional = create_conditional( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) posterior = create_posterior( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, ) register_posterior_test(posterior, FullyCorrelatedPosterior) _assert_fused_predict_f_equals_precomputed_predict_f_and_conditional( posterior, conditional, full_cov, full_output_cov )
def __init__( self, kernel, likelihood, inducing_variable, *, mean_function=None, num_latent_gps: int = 1, q_diag: bool = False, q_mu=None, q_sqrt=None, whiten: bool = True, num_data=None, ): """ - kernel, likelihood, inducing_variables, mean_function are appropriate GPflow objects - num_latent_gps is the number of latent processes to use, defaults to 1 - q_diag is a boolean. If True, the covariance is approximated by a diagonal matrix. - whiten is a boolean. If True, we use the whitened representation of the inducing points. - num_data is the total number of observations, defaults to X.shape[0] (relevant when feeding in external minibatches) """ # init the super class, accept args super().__init__(kernel, likelihood, mean_function, num_latent_gps) self.num_data = num_data self.q_diag = q_diag self.whiten = whiten self.inducing_variable = inducingpoint_wrapper(inducing_variable) # init variational parameters num_inducing = len(self.inducing_variable) self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)
def __init__(self, kernel, inducing_variables, num_outputs, mean_function, input_prop_dim=None, white=False, **kwargs): super().__init__(input_prop_dim, **kwargs) self.num_inducing = inducing_variables.shape[0] self.mean_function = mean_function self.num_outputs = num_outputs self.white = white self.kernels = [] for i in range(self.num_outputs): self.kernels.append(copy.deepcopy(kernel)) # Initialise q_mu to all zeros q_mu = np.zeros((self.num_inducing, num_outputs)) self.q_mu = Parameter(q_mu, dtype=default_float()) # Initialise q_sqrt to identity function #q_sqrt = tf.tile(tf.expand_dims(tf.eye(self.num_inducing, # dtype=default_float()), 0), (num_outputs, 1, 1)) q_sqrt = [ np.eye(self.num_inducing, dtype=default_float()) for _ in range(num_outputs) ] q_sqrt = np.array(q_sqrt) # Store as lower triangular matrix L. self.q_sqrt = Parameter(q_sqrt, transform=triangular()) # Initialise to prior (Ku) + jitter. if not self.white: Kus = [ self.kernels[i].K(inducing_variables) for i in range(self.num_outputs) ] Lus = [ np.linalg.cholesky(Kus[i] + np.eye(self.num_inducing) * default_jitter()) for i in range(self.num_outputs) ] q_sqrt = Lus q_sqrt = np.array(q_sqrt) self.q_sqrt = Parameter(q_sqrt, transform=triangular()) self.inducing_points = [] for i in range(self.num_outputs): self.inducing_points.append( inducingpoint_wrapper(inducing_variables))
def __init__( self, encoder, kernel: Optional[Kernel] = None, inducing_variable=None, *, num_latent_gps: int = 1, #Y.shape[-1] data_dim: tuple = None, mean_function=None, q_diag: bool = False, q_mu=None, q_sqrt=None, whiten: bool = False, ): """ - kernel, likelihood, inducing_variables, mean_function are appropriate GPflow objects - num_latent_gps is the number of latent processes to use, defaults to 2, as the dimensionality reduction is at dimensions 2 - q_diag is a boolean. If True, the covariance is approximated by a diagonal matrix. - whiten is a boolean. If True, we use the whitened representation of the inducing points. - num_data is the total number of observations, defaults to X.shape[0] (relevant when feeding in external minibatches) The prior is by default a Normal Gaussian """ if kernel is None: kernel = gpflow.kernels.SquaredExponential() self.num_data = data_dim[0] self.num_latent_gps = data_dim[1] self.q_diag = q_diag self.whiten = whiten self.inducing_variable = inducingpoint_wrapper(inducing_variable) # init variational parameters num_inducing = self.inducing_variable.num_inducing self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag) self.loss_placeholder = defaultdict( list, {k: [] for k in ("KL_x", "ELBO", "KL_u")}) self.encoder = encoder # init the super class, accept args super().__init__(kernel, likelihoods.Gaussian(variance=0.1), mean_function, num_latent_gps)
def test_posterior_update_cache_fails_without_argument(q_sqrt_factory, whiten): # setup posterior kernel = gpflow.kernels.SquaredExponential() inducing_variable = inducingpoint_wrapper( np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) q_mu = tf.Variable(np.random.randn(NUM_INDUCING_POINTS, 1)) initial_q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1) if initial_q_sqrt is not None: q_sqrt = tf.Variable(initial_q_sqrt) else: q_sqrt = initial_q_sqrt posterior = IndependentPosteriorSingleOutput( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, precompute_cache=None, ) assert posterior.alpha is None assert posterior.Qinv is None with pytest.raises(ValueError): posterior.update_cache() posterior.update_cache(PrecomputeCacheType.TENSOR) assert isinstance(posterior.alpha, tf.Tensor) assert isinstance(posterior.Qinv, tf.Tensor) posterior.update_cache(PrecomputeCacheType.NOCACHE) assert posterior._precompute_cache == PrecomputeCacheType.NOCACHE assert posterior.alpha is None assert posterior.Qinv is None posterior.update_cache( PrecomputeCacheType.TENSOR) # set posterior._precompute_cache assert posterior._precompute_cache == PrecomputeCacheType.TENSOR posterior.alpha = posterior.Qinv = None # clear again posterior.update_cache() # does not raise an exception assert isinstance(posterior.alpha, tf.Tensor) assert isinstance(posterior.Qinv, tf.Tensor)
def test_posterior_update_cache_with_variables_no_precompute( q_sqrt_factory, whiten, precompute_cache_type): kernel = gpflow.kernels.SquaredExponential() inducing_variable = inducingpoint_wrapper( np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) q_mu = np.random.randn(NUM_INDUCING_POINTS, 1) q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1) posterior = IndependentPosteriorSingleOutput( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, precompute_cache=precompute_cache_type, ) posterior.update_cache(PrecomputeCacheType.VARIABLE) assert isinstance(posterior.alpha, tf.Variable) assert isinstance(posterior.Qinv, tf.Variable)
def test_posterior_update_cache_with_variables_update_value( q_sqrt_factory, whiten): # setup posterior kernel = gpflow.kernels.SquaredExponential() inducing_variable = inducingpoint_wrapper( np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS)) q_mu = tf.Variable(np.random.randn(NUM_INDUCING_POINTS, 1)) initial_q_sqrt = q_sqrt_factory(NUM_INDUCING_POINTS, 1) if initial_q_sqrt is not None: q_sqrt = tf.Variable(initial_q_sqrt) else: q_sqrt = initial_q_sqrt posterior = IndependentPosteriorSingleOutput( kernel=kernel, inducing_variable=inducing_variable, q_mu=q_mu, q_sqrt=q_sqrt, whiten=whiten, precompute_cache=PrecomputeCacheType.TENSOR, ) initial_alpha = posterior.alpha initial_Qinv = posterior.Qinv posterior.update_cache(PrecomputeCacheType.VARIABLE) # ensure the values of alpha and Qinv will change q_mu.assign_add(tf.ones_like(q_mu)) if initial_q_sqrt is not None: q_sqrt.assign_add(tf.ones_like(q_sqrt)) posterior.update_cache(PrecomputeCacheType.VARIABLE) # assert that the values have changed assert not np.allclose(initial_alpha, tf.convert_to_tensor( posterior.alpha)) if initial_q_sqrt is not None: assert not np.allclose(initial_Qinv, tf.convert_to_tensor(posterior.Qinv))
def __init__(self, kernel, inducing_variables, q_mu_initial, q_sqrt_initial, mean_function,optimize_inducing_location=False, white=False, **kwargs): super().__init__(**kwargs) self.inducing_points = inducingpoint_wrapper(inducing_variables) gpflow.set_trainable(self.inducing_points, optimize_inducing_location) self.num_inducing = inducing_variables.shape[0] # Initialise q_mu to y^2_pi(i) q_mu = q_mu_initial[:,None] if optimize_inducing_location: q_mu = np.zeros((self.num_inducing, 1)) self.q_mu = Parameter(q_mu, dtype=default_float()) # Initialise q_sqrt to near deterministic. Store as lower triangular matrix L. q_sqrt = 1e-4*np.eye(self.num_inducing, dtype=default_float()) #q_sqrt = np.diag(q_sqrt_initial) self.q_sqrt = Parameter(q_sqrt, transform=triangular()) self.kernel = kernel self.mean_function = mean_function self.white = white
def __init__(self, X, Y, M, mean_function=Zero(), white=False, Layer=SVGPLayer, **kwargs): self.temporal_layers = [] for i in range(self.num_outputs): kerneli = self.temporal_kernel() inducing_inputs = inducingpoint_wrapper( kmeans2(X, M, minit='points')[0]) layer = Layer(kerneli, inducing_inputs.Z, mean_function, white=white) self.temporal_layers.append(layer) super().__init__(**kwargs)
def __init__( self, data: OutputData, encoder, kernel: Optional[Kernel] = None, inducing_variable=None, X_prior_mean=None, X_prior_var=None, ): """ Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood. :param data: data matrix, size N (number of points) x D (dimensions) :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions). :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space. :param kernel: kernel specification, by default Squared Exponential :param num_inducing_variables: number of inducing points, M :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default random permutation of X_data_mean. :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean. :param X_prior_var: prior variance used in KL term of bound. By default 1. """ self.latent_dimensions = 2 #grab data self.data = data_input_to_tensor(data) num_data, num_latent_gps = data.shape self.num_data = num_data #def kernel if kernel is None: kernel = gpflow.kernels.SquaredExponential() #init GPMODEL super().__init__(kernel, likelihoods.Gaussian(variance=0.1), num_latent_gps=num_latent_gps) #init parameter inducing point if (inducing_variable is None): raise ValueError( "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`" ) else: self.inducing_variable = inducingpoint_wrapper(inducing_variable) #loss placeholder for analysis purpuse self.loss_placeholder = defaultdict(list, {k: [] for k in ("KL_x", "ELBO")}) # deal with parameters for the prior mean variance of X if X_prior_mean is None: X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions), dtype=default_float()) if X_prior_var is None: X_prior_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean), dtype=default_float()) self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var), dtype=default_float()) # Encoder self.encoder = encoder #sanity check # assert np.all(X_data_mean.shape == X_data_var.shape) # assert X_data_mean.shape[0] == self.data.shape[0], "X mean and Y must be same size." # assert X_data_var.shape[0] == self.data.shape[0], "X var and Y must be same size." # assert X_data_mean.shape[1] == self.latent_dimensions assert self.X_prior_mean.shape[0] == self.num_data assert self.X_prior_mean.shape[1] == self.latent_dimensions assert self.X_prior_var.shape[0] == self.num_data assert self.X_prior_var.shape[1] == self.latent_dimensions
def __init__( self, data: OutputData, split_space: bool, Xp_mean: tf.Tensor, Xp_var: tf.Tensor, pi: tf.Tensor, kernel_K: List[Kernel], Zp: tf.Tensor, Xs_mean=None, Xs_var=None, kernel_s=None, Zs=None, Xs_prior_mean=None, Xs_prior_var=None, Xp_prior_mean=None, Xp_prior_var=None, pi_prior=None ): """ Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood. :param data: data matrix, size N (number of points) x D (dimensions) :param: split_space, if true, have both shared and private space; if false, only have private spaces (note: to recover GPLVM, set split_space=False and let K=1) :param Xp_mean: mean latent positions in the private space [N, Qp] (Qp is the dimension of the private space) :param Xp_var: variance of the latent positions in the private space [N, Qp] :param pi: mixture responsibility of each category to each point [N, K] (K is the number of categories), i.e. q(c) :param kernel_K: private space kernel, one for each category :param Zp: inducing inputs of the private space [M, Qp] :param num_inducing_variables: number of inducing points, M :param Xs_mean: mean latent positions in the shared space [N, Qs] (Qs is the dimension of the shared space). i.e. mus in q(Xs) ~ N(Xs | mus, Ss) :param Xs_var: variance of latent positions in shared space [N, Qs], i.e. Ss, assumed diagonal :param kernel_s: shared space kernel :param Zs: inducing inputs of the shared space [M, Qs] (M is the number of inducing points) :param Xs_prior_mean: prior mean used in KL term of bound, [N, Qs]. By default 0. mean in p(Xs) :param Xs_prior_var: prior variance used in KL term of bound, [N, Qs]. By default 1. variance in p(Xs) :param Xp_prior_mean: prior mean used in KL term of bound, [N, Qp]. By default 0. mean in p(Xp) :param Xp_prior_var: prior variance used in KL term of bound, [N, Qp]. By default 1. variance in p(Xp) :param pi_prior: prior mixture weights used in KL term of the bound, [N, K]. By default uniform. p(c) """ # if don't want shared space, set shared space to none --> get a mixture of GPLVM # if don't want private space, set shared space to none, set K = 1 and only include 1 kernel in `kernel_K` --> recover the original GPLVM # TODO: think about how to do this with minibatch # it's awkward since w/ minibatch the model usually doesn't store the data internally # but for gplvm, you need to keep the q(xn) for all the n's # so you need to know which ones to update for each minibatch, probably can be solved but not pretty # using inference network / back constraints will solve this, since we will be keeping a global set of parameters # rather than a set for each q(xn) self.N, self.D = data.shape self.Qp = Xp_mean.shape[1] self.K = pi.shape[1] self.split_space = split_space assert Xp_var.ndim == 2 assert len(kernel_K) == self.K assert np.all(Xp_mean.shape == Xp_var.shape) assert Xp_mean.shape[0] == self.N, "Xp_mean and Y must be of same size" assert pi.shape[0] == self.N, "pi and Y must be of the same size" super().__init__() self.likelihood = likelihoods.Gaussian() self.kernel_K = kernel_K self.data = data_input_to_tensor(data) # the covariance of q(X) as a [N, Q] matrix, the assumption is that Sn's are diagonal # i.e. the latent dimensions are uncorrelated # otherwise would require a [N, Q, Q] matrix self.Xp_mean = Parameter(Xp_mean) self.Xp_var = Parameter(Xp_var, transform=positive()) self.pi = Parameter(pi, transform=tfp.bijectors.SoftmaxCentered()) self.Zp = inducingpoint_wrapper(Zp) self.M = len(self.Zp) # initialize the variational parameters for q(U), same way as in SVGP # q_mu: List[K], mean of the inducing variables U [M, D], i.e m in q(U) ~ N(U | m, S), # initialized as zeros # q_sqrt: List[K], cholesky of the covariance matrix of the inducing variables [D, M, M] # q_diag is false because natural gradient only works for full covariance # initialized as all identities # we need K sets of q(Uk), each approximating fs+fk self.q_mu = [] self.q_sqrt = [] for k in range(self.K): q_mu = np.zeros((self.M, self.D)) q_mu = Parameter(q_mu, dtype=default_float()) # [M, D] self.q_mu.append(q_mu) q_sqrt = [ np.eye(self.M, dtype=default_float()) for _ in range(self.D) ] q_sqrt = np.array(q_sqrt) q_sqrt = Parameter(q_sqrt, transform=triangular()) # [D, M, M] self.q_sqrt.append(q_sqrt) # deal with parameters for the prior if Xp_prior_mean is None: Xp_prior_mean = tf.zeros((self.N, self.Qp), dtype=default_float()) if Xp_prior_var is None: Xp_prior_var = tf.ones((self.N, self.Qp)) if pi_prior is None: pi_prior = tf.ones((self.N, self.K), dtype=default_float()) * 1/self.K self.Xp_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xp_prior_mean), dtype=default_float()) self.Xp_prior_var = tf.convert_to_tensor(np.atleast_1d(Xp_prior_var), dtype=default_float()) self.pi_prior = tf.convert_to_tensor(np.atleast_1d(pi_prior), dtype=default_float()) # if we have both shared space and private space, need to initialize the parameters for the shared space if split_space: assert Xs_mean is not None and Xs_var is not None and kernel_s is not None and Zs is not None, 'Xs_mean, Xs_var, kernel_s, Zs need to be initialize if `split_space=True`' assert Xs_var.ndim == 2 assert np.all(Xs_mean.shape == Xs_var.shape) assert Xs_mean.shape[0] == self.N, "Xs_mean and Y must be of same size" self.Qs = Xs_mean.shape[1] self.kernel_s = kernel_s self.Xs_mean = Parameter(Xs_mean) self.Xs_var = Parameter(Xs_var, transform=positive()) self.Zs = inducingpoint_wrapper(Zs) if len(Zs) != len(Zp): raise ValueError( '`Zs` and `Zp` should have the same length' ) if Xs_prior_mean is None: Xs_prior_mean = tf.zeros((self.N, self.Qs), dtype=default_float()) if Xs_prior_var is None: Xs_prior_var = tf.ones((self.N, self.Qs)) self.Xs_prior_mean = tf.convert_to_tensor(np.atleast_1d(Xs_prior_mean), dtype=default_float()) self.Xs_prior_var = tf.convert_to_tensor(np.atleast_1d(Xs_prior_var), dtype=default_float()) self.Fq = tf.zeros((self.N, self.K), dtype=default_float())
def __init__( self, data: OutputData, X_data_mean: Optional[tf.Tensor] = None, X_data_var: Optional[tf.Tensor] = None, kernel: Optional[Kernel] = None, num_inducing_variables: Optional[int] = None, inducing_variable=None, X_prior_mean=None, X_prior_var=None, ): """ Initialise Bayesian GPLVM object. This method only works with a Gaussian likelihood. :param data: data matrix, size N (number of points) x D (dimensions) :param X_data_mean: initial latent positions, size N (number of points) x Q (latent dimensions). :param X_data_var: variance of latent positions ([N, Q]), for the initialisation of the latent space. :param kernel: kernel specification, by default Squared Exponential :param num_inducing_variables: number of inducing points, M :param inducing_variable: matrix of inducing points, size M (inducing points) x Q (latent dimensions). By default random permutation of X_data_mean. :param X_prior_mean: prior mean used in KL term of bound. By default 0. Same size as X_data_mean. :param X_prior_var: prior variance used in KL term of bound. By default 1. """ self.latent_dimensions = 2 #grab data self.data = data_input_to_tensor(data) #define lat-space initialization if X_data_mean is None: X_data_mean = pca_reduce(data, self.latent_dimensions) num_data, num_latent_gps = X_data_mean.shape self.num_data = num_data if X_data_var is None: X_data_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) assert X_data_var.ndim == 2 self.output_dim = self.data.shape[-1] #num_latent maybe #def kernel if kernel is None: kernel = gpflow.kernels.SquaredExponential() #init GPMODEL super().__init__(kernel, likelihoods.Gaussian(variance=0.1), num_latent_gps=num_latent_gps) #init Parameters latent self.X_data_mean = Parameter(X_data_mean) self.X_data_var = Parameter(X_data_var, transform=positive()) #init parameter inducing point if (inducing_variable is None) == (num_inducing_variables is None): raise ValueError( "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`" ) if inducing_variable is None: # By default we initialize by subset of initial latent points # Note that tf.random.shuffle returns a copy, it does not shuffle in-place #maybe use k-means clustering Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables] inducing_variable = InducingPoints(Z) self.inducing_variable = inducingpoint_wrapper(inducing_variable) #loss placeholder for analysis purpuse self.loss_placeholder = defaultdict(list, {k: [] for k in ("KL_x", "ELBO")}) # deal with parameters for the prior mean variance of X if X_prior_mean is None: X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions), dtype=default_float()) if X_prior_var is None: X_prior_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean), dtype=default_float()) self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var), dtype=default_float()) #sanity check assert np.all(X_data_mean.shape == X_data_var.shape) assert X_data_mean.shape[0] == self.data.shape[ 0], "X mean and Y must be same size." assert X_data_var.shape[0] == self.data.shape[ 0], "X var and Y must be same size." assert X_data_mean.shape[1] == self.latent_dimensions assert self.X_prior_mean.shape[0] == self.num_data assert self.X_prior_mean.shape[1] == self.latent_dimensions assert self.X_prior_var.shape[0] == self.num_data assert self.X_prior_var.shape[1] == self.latent_dimensions
def __init__( self, data: OutputData, kernel: Optional[Kernel] = None, latent_dimensions: Optional[int] = 2, num_inducing_variables: Optional[int] = None, inducing_variable=None, *, mean_function=None, q_diag: bool = False, q_mu=None, q_sqrt=None, whiten: bool = False, ): """ - kernel, likelihood, inducing_variables, mean_function are appropriate GPflow objects - num_latent_gps is the number of latent processes to use, defaults to 2, as the dimensionality reduction is at dimensions 2 - q_diag is a boolean. If True, the covariance is approximated by a diagonal matrix. - whiten is a boolean. If True, we use the whitened representation of the inducing points. - num_data is the total number of observations, defaults to X.shape[0] (relevant when feeding in external minibatches) """ self.latent_dimensions = latent_dimensions #grab data self.data = data_input_to_tensor(data) #define lat-space initialization X_data_mean = pca_reduce(data, self.latent_dimensions) num_data, num_latent_gps = data.shape self.num_data = num_data X_data_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) assert X_data_var.ndim == 2 #def kernel if kernel is None: kernel = gpflow.kernels.SquaredExponential() #init Parameters latent self.X_data_mean = Parameter(X_data_mean) self.X_data_var = Parameter(X_data_var, transform=positive()) #init parameter inducing point if (inducing_variable is None) == (num_inducing_variables is None): raise ValueError( "BayesianGPLVM needs exactly one of `inducing_variable` and `num_inducing_variables`" ) if inducing_variable is None: # By default we initialize by subset of initial latent points # Note that tf.random.shuffle returns a copy, it does not shuffle in-place #maybe use k-means clustering Z = tf.random.shuffle(X_data_mean)[:num_inducing_variables] inducing_variable = InducingPoints(Z) self.inducing_variable = inducingpoint_wrapper(inducing_variable) #loss placeholder for analysis purpuse self.loss_placeholder = defaultdict( list, {k: [] for k in ("KL_x", "ELBO", "KL_u")}) # deal with parameters for the prior mean variance of X X_prior_mean = tf.zeros((self.num_data, self.latent_dimensions), dtype=default_float()) X_prior_var = tf.ones((self.num_data, self.latent_dimensions), dtype=default_float()) self.X_prior_mean = tf.convert_to_tensor(np.atleast_1d(X_prior_mean), dtype=default_float()) self.X_prior_var = tf.convert_to_tensor(np.atleast_1d(X_prior_var), dtype=default_float()) #sanity check assert np.all(X_data_mean.shape == X_data_var.shape) assert X_data_mean.shape[0] == self.data.shape[ 0], "X mean and Y must be same size." assert X_data_var.shape[0] == self.data.shape[ 0], "X var and Y must be same size." assert X_data_mean.shape[1] == self.latent_dimensions assert self.X_prior_mean.shape[0] == self.num_data assert self.X_prior_mean.shape[1] == self.latent_dimensions assert self.X_prior_var.shape[0] == self.num_data assert self.X_prior_var.shape[1] == self.latent_dimensions # init the super class, accept args super().__init__(kernel, likelihoods.Gaussian(variance=0.1), mean_function, num_latent_gps) self.q_diag = q_diag self.whiten = whiten #self.inducing_variable = inducingpoint_wrapper(inducing_variable) # init variational parameters num_inducing = self.inducing_variable.num_inducing self._init_variational_parameters(num_inducing, q_mu, q_sqrt, q_diag)