def model_mixture_adaptive(X, ls=1., n_mix=2, ridge_factor=1e-3): """Defines the Adaptive Mixture of Gaussian Process Model. Note: Currently this method is not tested and is likely to not work well due to explicit sampling of membership variables. (i.e. mix_member). More work need to be done to perform integrated sampling. Args: X: (np.ndarray of float32) input training features. with dimension (N, D). ls: (float32) length scale parameter. n_mix: (int8) Number of mixture components. ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition. Returns: (tf.Tensors of float32) model parameters. """ # TODO(jereliu): find a way to integrate over adaptive mixture. raise Warning( "Currently this method is not tested and is likely to not" "work well due to explicit sampling of membership variables. " "(i.e. mix_member). More work need to be done to perform " "integrated sampling.") N = X.shape[0] K_mat = rbf(X, ls=ls, ridge_factor=ridge_factor) gp_weight = ed.Independent(distribution=tfd.MultivariateNormalTriL( loc=tf.zeros(shape=[n_mix, N]), scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix), ), reinterpreted_batch_ndims=1, name="gp_w") mix_member = ed.Multinomial(total_count=[1.], logits=tf.transpose(gp_weight), name="mix_prob") gp_comp = ed.Independent(distribution=tfd.MultivariateNormalTriL( loc=tf.zeros(shape=[n_mix, N]), scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix), ), reinterpreted_batch_ndims=1, name="gp_f") gp_f = tf.reduce_sum(tf.transpose(gp_comp) * mix_member, axis=-1) sigma = ed.Normal(loc=-5., scale=1., name='sigma') y = ed.MultivariateNormalDiag(loc=gp_f, scale_identity_multiplier=tf.exp(sigma), name="y") # y = ed.MixtureSameFamily( # components_distribution=tfd.MultivariateNormalDiag( # loc=gp_comp, scale_identity_multiplier=tf.exp(sigma)), # mixture_distribution=tfd.Categorical(logits=gp_weight), # name="y") return gp_weight, mix_member, gp_comp, sigma, y
def __call__(self, shape, dtype=None, partition_info=None): del partition_info # unused arg if not self.built: self.build(shape, dtype) return ed.Independent(ed.Normal(loc=self.mean, scale=self.stddev).distribution, reinterpreted_batch_ndims=len(shape))
def call(self, inputs): if self.conditional_inputs is None and self.conditional_outputs is None: covariance_matrix = self.covariance_fn(inputs, inputs) # Tile locations so output has shape [units, batch_size]. Covariance will # broadcast to [units, batch_size, batch_size], and we perform # shape manipulations to get a random variable over [batch_size, units]. loc = self.mean_fn(inputs) loc = tf.tile(loc[tf.newaxis], [self.units] + [1] * len(loc.shape)) else: knn = self.covariance_fn(inputs, inputs) knm = self.covariance_fn(inputs, self.conditional_inputs) kmm = self.covariance_fn(self.conditional_inputs, self.conditional_inputs) kmm = tf.matrix_set_diag( kmm, tf.matrix_diag_part(kmm) + tf.keras.backend.epsilon()) kmm_tril = tf.linalg.cholesky(kmm) kmm_tril_operator = tf.linalg.LinearOperatorLowerTriangular(kmm_tril) knm_operator = tf.linalg.LinearOperatorFullMatrix(knm) # TODO(trandustin): Vectorize linear algebra for multiple outputs. For # now, we do each separately and stack to obtain a locations Tensor of # shape [units, batch_size]. loc = [] for conditional_outputs_unit in tf.unstack(self.conditional_outputs, axis=-1): center = conditional_outputs_unit - self.mean_fn( self.conditional_inputs) loc_unit = knm_operator.matvec( kmm_tril_operator.solvevec(kmm_tril_operator.solvevec(center), adjoint=True)) loc.append(loc_unit) loc = tf.stack(loc) + self.mean_fn(inputs)[tf.newaxis] covariance_matrix = knn covariance_matrix -= knm_operator.matmul( kmm_tril_operator.solve( kmm_tril_operator.solve(knm, adjoint_arg=True), adjoint=True)) covariance_matrix = tf.matrix_set_diag( covariance_matrix, tf.matrix_diag_part(covariance_matrix) + tf.keras.backend.epsilon()) # Form a multivariate normal random variable with batch_shape units and # event_shape batch_size. Then make it be independent across the units # dimension. Then transpose its dimensions so it is [batch_size, units]. random_variable = ed.MultivariateNormalFullCovariance( loc=loc, covariance_matrix=covariance_matrix) random_variable = ed.Independent(random_variable.distribution, reinterpreted_batch_ndims=1) bijector = tfp.bijectors.Inline( forward_fn=lambda x: tf.transpose(x, [1, 0]), inverse_fn=lambda y: tf.transpose(y, [1, 0]), forward_event_shape_fn=lambda input_shape: input_shape[::-1], forward_event_shape_tensor_fn=lambda input_shape: input_shape[::-1], inverse_log_det_jacobian_fn=lambda y: tf.cast(0, y.dtype), forward_min_event_ndims=2) random_variable = ed.TransformedDistribution(random_variable.distribution, bijector=bijector) return random_variable
def model_mixture_adaptive2(X, ls=1., n_mix=2, ridge_factor=1e-3): """Alternative representation using Mixture family. Args: X: (np.ndarray of float32) input training features. with dimension (N, D). ls: (float32) length scale parameter. n_mix: (int8) Number of mixture components. ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition. Returns: (tf.Tensors of float32) model parameters. """ N = X.shape[0] K_mat = rbf(X, ls=ls, ridge_factor=ridge_factor) gp_weight = ed.Independent(distribution=tfd.MultivariateNormalTriL( loc=tf.zeros(shape=[n_mix, N]), scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix), ), reinterpreted_batch_ndims=1, name="gp_w") mix_member = ed.Multinomial(total_count=[1.], logits=tf.transpose(gp_weight), name="mix_prob") gp_comp = ed.Independent(distribution=tfd.MultivariateNormalTriL( loc=tf.zeros(shape=[n_mix, N]), scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix), ), reinterpreted_batch_ndims=1, name="gp_f") sigma = ed.Normal(loc=tf.ones(n_mix) * -5., scale=tf.ones(n_mix) * 1., name='sigma') y = ed.MixtureSameFamily( components_distribution=tfd.MultivariateNormalDiag( loc=gp_comp, scale_identity_multiplier=tf.exp(sigma)), mixture_distribution=tfd.Categorical(logits=tf.transpose(gp_weight)), name="y") return gp_weight, mix_member, gp_comp, sigma, y
def __call__(self, x): """Computes regularization given an ed.Normal random variable as input.""" if not isinstance(x, ed.RandomVariable): raise ValueError('Input must be an ed.RandomVariable.') random_variable = ed.Independent(ed.Normal( loc=tf.broadcast_to(self.mean, x.distribution.event_shape), scale=tf.broadcast_to(self.stddev, x.distribution.event_shape)).distribution, reinterpreted_batch_ndims=len( x.distribution.event_shape)) return random_variable.distribution.kl_divergence(x.distribution)
def __call__(self, shape=None, dtype=None, partition_info=None): del shape, dtype, partition_info # Unused in TrainableInitializers. # TODO(dusenberrymw): Restructure so that we can build as needed. if not self.built: raise ValueError( 'A TrainableInitializer must be built by a layer before ' 'usage, and is currently only compatible with Bayesian ' 'layers.') return ed.Independent(ed.Normal(loc=self.mean, scale=self.stddev).distribution, reinterpreted_batch_ndims=len(self.shape))
def __call__(self, x): """Computes regularization using an unbiased Monte Carlo estimate.""" prior = ed.Independent( ed.HalfCauchy( loc=tf.broadcast_to(self.loc, x.distribution.event_shape), scale=tf.broadcast_to(self.scale, x.distribution.event_shape) ).distribution, reinterpreted_batch_ndims=len(x.distribution.event_shape)) negative_entropy = x.distribution.log_prob(x) cross_entropy = -prior.distribution.log_prob(x) return negative_entropy + cross_entropy
def model_mixture(X, ls=1., n_mix=2, ridge_factor=1e-3): """Defines the Gaussian Process Model. Args: X: (np.ndarray of float32) input training features. with dimension (N, D). ls: (float32) length scale parameter. n_mix: (int8) Number of mixture components. ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition. Returns: (tf.Tensors of float32) model parameters. """ N = X.shape[0] K_mat = rbf(X, ls=ls, ridge_factor=ridge_factor) mix_prob = ed.Dirichlet(concentration=tf.ones(n_mix, dtype=tf.float32) / n_mix, name='mix_prob') gp_f = ed.Independent(distribution=tfd.MultivariateNormalTriL( loc=tf.zeros(shape=[n_mix, N]), scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix), ), reinterpreted_batch_ndims=1, name="gp_f") sigma = ed.Normal(loc=tf.ones(n_mix) * -5., scale=tf.ones(n_mix) * 1., name='sigma') y = ed.MixtureSameFamily( components_distribution=tfd.MultivariateNormalDiag( loc=gp_f, scale_identity_multiplier=tf.exp(sigma)), mixture_distribution=tfd.Categorical(probs=mix_prob), name="y") return mix_prob, gp_f, sigma, y