def _tril_spherical_uniform(dimension, batch_shape, dtype, seed): """Returns a `Tensor` of samples of lower triangular matrices. Each row of the lower triangular part follows a spherical uniform distribution. Args: dimension: Scalar `int` `Tensor`, representing the dimensionality of the output matrices. batch_shape: Vector-shaped, `int` `Tensor` representing batch shape of output. The output will have shape `batch_shape + [dimension, dimension]`. dtype: TF `dtype` representing `dtype` of output. seed: PRNG seed; see `tfp.random.sanitize_seed` for details. Returns: tril_spherical_uniform: `Tensor` with specified `batch_shape` and `dtype` consisting of real values drawn row-wise from a spherical uniform distribution. """ # Essentially, we will draw lower triangular samples where each lower # triangular entry follows a normal distribution, then apply `x / norm(x)` # for each row of the samples. # To avoid possible NaNs, we will use spherical_uniform directly for # the first two rows. assert dimension > 0, '`dimension` needs to be positive.' num_seeds = min(dimension, 3) seeds = list(samplers.split_seed(seed, n=num_seeds, salt='sample_lkj')) rows = [] paddings_prepend = [[0, 0]] * len(batch_shape) for n in range(1, min(dimension, 2) + 1): rows.append( tf.pad(random_ops.spherical_uniform(shape=batch_shape, dimension=n, dtype=dtype, seed=seeds.pop()), paddings_prepend + [[0, dimension - n]], constant_values=0.)) samples = tf.stack(rows, axis=-2) if dimension > 2: normal_shape = ps.concat( [batch_shape, [dimension * (dimension + 1) // 2 - 3]], axis=0) normal_samples = samplers.normal(shape=normal_shape, dtype=dtype, seed=seeds.pop()) # We fill the first two rows of the triangular matrix with ones. # Note that fill_triangular fills elements in a clockwise spiral. normal_samples = tf.concat([ normal_samples[..., :dimension], tf.ones(ps.concat([batch_shape, [1]], axis=0), dtype=dtype), normal_samples[..., dimension:(2 * dimension - 1)], tf.ones(ps.concat([batch_shape, [2]], axis=0), dtype=dtype), normal_samples[..., (2 * dimension - 1):], ], axis=-1) normal_samples = linalg.fill_triangular(normal_samples, upper=False)[..., 2:, :] remaining_rows = normal_samples / tf.norm( normal_samples, ord=2, axis=-1, keepdims=True) samples = tf.concat([samples, remaining_rows], axis=-2) return samples
def _sample_direction_part(state_part, part_seed): state_part_shape = ps.shape(state_part) batch_shape = state_part_shape[:batch_rank] dimension = ps.reduce_prod(state_part_shape[batch_rank:]) return ps.reshape( random_ops.spherical_uniform(shape=batch_shape, dimension=dimension, dtype=state_part.dtype, seed=part_seed), state_part_shape)
def _sample_n(self, n, seed=None): mean_direction = tf.convert_to_tensor(self.mean_direction) concentration = tf.convert_to_tensor(self.concentration) event_size_int = self._event_shape_tensor( mean_direction=mean_direction)[0] event_size = tf.cast(event_size_int, dtype=self.dtype) beta_seed, uniform_seed = samplers.split_seed(seed, salt='power_spherical') broadcasted_concentration = tf.broadcast_to( concentration, self._batch_shape_tensor(mean_direction=mean_direction, concentration=concentration)) beta = beta_lib.Beta( (event_size - 1.) / 2. + broadcasted_concentration, (event_size - 1.) / 2.) beta_samples = beta.sample(n, seed=beta_seed) u_shape = ps.concat( [[n], self._batch_shape_tensor(mean_direction=mean_direction, concentration=concentration)], axis=0) spherical_samples = random_ops.spherical_uniform( shape=u_shape, dimension=event_size_int - 1, dtype=self.dtype, seed=uniform_seed) t = 2. * beta_samples - 1. y = tf.concat([ t[..., tf.newaxis], tf.math.sqrt(1. - tf.math.square(t))[..., tf.newaxis] * spherical_samples ], axis=-1) u = tf.concat([(1. - mean_direction[..., 0])[..., tf.newaxis], -mean_direction[..., 1:]], axis=-1) # Much like `VonMisesFisher`, we use `l2_normalize` which does # nothing if the zero vector is passed in, and thus the householder # reflection will do nothing. # This is consistent with sampling # with `mu = [1, 0, 0, ..., 0]` since samples will be of the # form: [w, sqrt(1 - w**2) * u] = w * mu + sqrt(1 - w**2) * v, # where: # * `u` is a unit vector sampled from the unit hypersphere. # * `v` is `[0, u]`. # This form is the same as sampling from the tangent-normal decomposition. u = tf.math.l2_normalize(u, axis=-1) return tf.math.l2_normalize( y - 2. * tf.math.reduce_sum(y * u, axis=-1, keepdims=True) * u, axis=-1)
def _sample_n(self, n, seed=None): mean_direction = tf.convert_to_tensor(self.mean_direction) concentration = tf.convert_to_tensor(self.concentration) event_size_int = self._event_shape_tensor( mean_direction=mean_direction)[0] event_size = tf.cast(event_size_int, dtype=self.dtype) beta_seed, uniform_seed = samplers.split_seed(seed, salt='power_spherical') broadcasted_concentration = tf.broadcast_to( concentration, self._batch_shape_tensor(mean_direction=mean_direction, concentration=concentration)) beta = beta_lib.Beta( (event_size - 1.) / 2. + broadcasted_concentration, (event_size - 1.) / 2.) beta_samples = beta.sample(n, seed=beta_seed) u_shape = ps.concat( [[n], self._batch_shape_tensor(mean_direction=mean_direction, concentration=concentration)], axis=0) spherical_samples = random_ops.spherical_uniform( shape=u_shape, dimension=event_size_int - 1, dtype=self.dtype, seed=uniform_seed) t = 2. * beta_samples - 1. y = tf.concat([ t[..., tf.newaxis], tf.math.sqrt(1. - tf.math.square(t))[..., tf.newaxis] * spherical_samples ], axis=-1) modified_mean = tf.concat( [(1. - mean_direction[..., 0])[..., tf.newaxis], -mean_direction[..., 1:]], axis=-1) modified_mean = tf.math.l2_normalize(modified_mean, axis=-1) householder_transform = tf.linalg.LinearOperatorHouseholder( modified_mean) return householder_transform.matvec(y)
def _sample_n(self, n, seed=None): return random_ops.spherical_uniform(shape=ps.concat( [[n], self.batch_shape], axis=0), dimension=self.dimension, dtype=self.dtype, seed=seed)
def sample_lkj(num_samples, dimension, concentration, cholesky_space=False, seed=None, name=None): """Returns a Tensor of samples from an LKJ distribution. Args: num_samples: Python `int`. The number of samples to draw. dimension: Python `int`. The dimension of correlation matrices. concentration: `Tensor` representing the concentration of the LKJ distribution. cholesky_space: Python `bool`. Whether to take samples from LKJ or Chol(LKJ). seed: Python integer seed for RNG name: Python `str` name prefixed to Ops created by this function. Returns: samples: A Tensor of correlation matrices (or Cholesky factors of correlation matrices if `cholesky_space = True`) with shape `[n] + B + [D, D]`, where `B` is the shape of the `concentration` parameter, and `D` is the `dimension`. Raises: ValueError: If `dimension` is negative. """ if dimension < 0: raise ValueError( 'Cannot sample negative-dimension correlation matrices.') # Notation below: B is the batch shape, i.e., tf.shape(concentration) # We need 1 seed for beta corr12, and 2 per loop iter. num_seeds = 1 + 2 * max(0, dimension - 2) seeds = list(samplers.split_seed(seed, n=num_seeds, salt='sample_lkj')) with tf.name_scope('sample_lkj' or name): concentration = tf.convert_to_tensor(concentration) if not dtype_util.is_floating(concentration.dtype): raise TypeError( 'The concentration argument should have floating type, not ' '{}'.format(dtype_util.name(concentration.dtype))) concentration = _replicate(num_samples, concentration) concentration_shape = ps.shape(concentration) if dimension <= 1: # For any dimension <= 1, there is only one possible correlation matrix. shape = ps.concat([concentration_shape, [dimension, dimension]], axis=0) return tf.ones(shape=shape, dtype=concentration.dtype) beta_conc = concentration + (dimension - 2.) / 2. beta_dist = beta.Beta(concentration1=beta_conc, concentration0=beta_conc) # Note that the sampler below deviates from [1], by doing the sampling in # cholesky space. This does not change the fundamental logic of the # sampler, but does speed up the sampling. # This is the correlation coefficient between the first two dimensions. # This is also `r` in reference [1]. corr12 = 2. * beta_dist.sample(seed=seeds.pop()) - 1. # Below we construct the Cholesky of the initial 2x2 correlation matrix, # which is of the form: # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the # first two dimensions. # This is the top-left corner of the cholesky of the final sample. first_row = tf.concat([ tf.ones_like(corr12)[..., tf.newaxis], tf.zeros_like(corr12)[..., tf.newaxis] ], axis=-1) second_row = tf.concat( [corr12[..., tf.newaxis], tf.sqrt(1 - corr12**2)[..., tf.newaxis]], axis=-1) chol_result = tf.concat( [first_row[..., tf.newaxis, :], second_row[..., tf.newaxis, :]], axis=-2) for n in range(2, dimension): # Loop invariant: on entry, result has shape B + [n, n] beta_conc = beta_conc - 0.5 # norm is y in reference [1]. norm = beta.Beta(concentration1=n / 2., concentration0=beta_conc).sample(seed=seeds.pop()) # distance shape: B + [1] for broadcast distance = tf.sqrt(norm)[..., tf.newaxis] # direction is u in reference [1]. # direction shape: B + [n] direction = random_ops.spherical_uniform(shape=concentration_shape, dimension=n, dtype=concentration.dtype, seed=seeds.pop()) # raw_correlation is w in reference [1]. raw_correlation = distance * direction # shape: B + [n] # This is the next row in the cholesky of the result, # which differs from the construction in reference [1]. # In the reference, the new row `z` = chol_result @ raw_correlation^T # = C @ raw_correlation^T (where as short hand we use C = chol_result). # We prove that the below equation is the right row to add to the # cholesky, by showing equality with reference [1]. # Let S be the sample constructed so far, and let `z` be as in # reference [1]. Then at this iteration, the new sample S' will be # [[S z^T] # [z 1]] # In our case we have the cholesky decomposition factor C, so # we want our new row x (same size as z) to satisfy: # [[S z^T] [[C 0] [[C^T x^T] [[CC^T Cx^T] # [z 1]] = [x k]] [0 k]] = [xC^t xx^T + k**2]] # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible, # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 - # distance**2). new_row = tf.concat( [raw_correlation, tf.sqrt(1. - norm[..., tf.newaxis])], axis=-1) # Finally add this new row, by growing the cholesky of the result. chol_result = tf.concat([ chol_result, tf.zeros_like(chol_result[..., 0][..., tf.newaxis]) ], axis=-1) chol_result = tf.concat([chol_result, new_row[..., tf.newaxis, :]], axis=-2) assert not seeds, 'Did not use all seeds: ' + len(seeds) if cholesky_space: return chol_result result = tf.matmul(chol_result, chol_result, transpose_b=True) # The diagonal for a correlation matrix should always be ones. Due to # numerical instability the matmul might not achieve that, so manually set # these to ones. result = tf.linalg.set_diag( result, tf.ones(shape=ps.shape(result)[:-1], dtype=result.dtype)) # This sampling algorithm can produce near-PSD matrices on which standard # algorithms such as `tf.linalg.cholesky` or # `tf.linalg.self_adjoint_eigvals` fail. Specifically, as documented in # b/116828694, around 2% of trials of 900,000 5x5 matrices (distributed # according to 9 different concentration parameter values) contained at # least one matrix on which the Cholesky decomposition failed. return result