Exemple #1
0
def _tril_spherical_uniform(dimension, batch_shape, dtype, seed):
    """Returns a `Tensor` of samples of lower triangular matrices.

  Each row of the lower triangular part follows a spherical uniform
  distribution.

  Args:
    dimension: Scalar `int` `Tensor`, representing the dimensionality of the
      output matrices.
    batch_shape: Vector-shaped, `int` `Tensor` representing batch shape of
      output. The output will have shape `batch_shape + [dimension, dimension]`.
    dtype: TF `dtype` representing `dtype` of output.
    seed: PRNG seed; see `tfp.random.sanitize_seed` for details.

  Returns:
    tril_spherical_uniform: `Tensor` with specified `batch_shape` and `dtype`
      consisting of real values drawn row-wise from a spherical uniform
      distribution.
  """
    # Essentially, we will draw lower triangular samples where each lower
    # triangular entry follows a normal distribution, then apply `x / norm(x)`
    # for each row of the samples.
    # To avoid possible NaNs, we will use spherical_uniform directly for
    # the first two rows.
    assert dimension > 0, '`dimension` needs to be positive.'
    num_seeds = min(dimension, 3)
    seeds = list(samplers.split_seed(seed, n=num_seeds, salt='sample_lkj'))
    rows = []
    paddings_prepend = [[0, 0]] * len(batch_shape)
    for n in range(1, min(dimension, 2) + 1):
        rows.append(
            tf.pad(random_ops.spherical_uniform(shape=batch_shape,
                                                dimension=n,
                                                dtype=dtype,
                                                seed=seeds.pop()),
                   paddings_prepend + [[0, dimension - n]],
                   constant_values=0.))
    samples = tf.stack(rows, axis=-2)
    if dimension > 2:
        normal_shape = ps.concat(
            [batch_shape, [dimension * (dimension + 1) // 2 - 3]], axis=0)
        normal_samples = samplers.normal(shape=normal_shape,
                                         dtype=dtype,
                                         seed=seeds.pop())
        # We fill the first two rows of the triangular matrix with ones.
        # Note that fill_triangular fills elements in a clockwise spiral.
        normal_samples = tf.concat([
            normal_samples[..., :dimension],
            tf.ones(ps.concat([batch_shape, [1]], axis=0), dtype=dtype),
            normal_samples[..., dimension:(2 * dimension - 1)],
            tf.ones(ps.concat([batch_shape, [2]], axis=0), dtype=dtype),
            normal_samples[..., (2 * dimension - 1):],
        ],
                                   axis=-1)
        normal_samples = linalg.fill_triangular(normal_samples,
                                                upper=False)[..., 2:, :]
        remaining_rows = normal_samples / tf.norm(
            normal_samples, ord=2, axis=-1, keepdims=True)
        samples = tf.concat([samples, remaining_rows], axis=-2)
    return samples
 def _sample_direction_part(state_part, part_seed):
     state_part_shape = ps.shape(state_part)
     batch_shape = state_part_shape[:batch_rank]
     dimension = ps.reduce_prod(state_part_shape[batch_rank:])
     return ps.reshape(
         random_ops.spherical_uniform(shape=batch_shape,
                                      dimension=dimension,
                                      dtype=state_part.dtype,
                                      seed=part_seed), state_part_shape)
    def _sample_n(self, n, seed=None):
        mean_direction = tf.convert_to_tensor(self.mean_direction)
        concentration = tf.convert_to_tensor(self.concentration)
        event_size_int = self._event_shape_tensor(
            mean_direction=mean_direction)[0]
        event_size = tf.cast(event_size_int, dtype=self.dtype)

        beta_seed, uniform_seed = samplers.split_seed(seed,
                                                      salt='power_spherical')

        broadcasted_concentration = tf.broadcast_to(
            concentration,
            self._batch_shape_tensor(mean_direction=mean_direction,
                                     concentration=concentration))
        beta = beta_lib.Beta(
            (event_size - 1.) / 2. + broadcasted_concentration,
            (event_size - 1.) / 2.)
        beta_samples = beta.sample(n, seed=beta_seed)

        u_shape = ps.concat(
            [[n],
             self._batch_shape_tensor(mean_direction=mean_direction,
                                      concentration=concentration)],
            axis=0)

        spherical_samples = random_ops.spherical_uniform(
            shape=u_shape,
            dimension=event_size_int - 1,
            dtype=self.dtype,
            seed=uniform_seed)

        t = 2. * beta_samples - 1.
        y = tf.concat([
            t[..., tf.newaxis],
            tf.math.sqrt(1. - tf.math.square(t))[..., tf.newaxis] *
            spherical_samples
        ],
                      axis=-1)

        u = tf.concat([(1. - mean_direction[..., 0])[..., tf.newaxis],
                       -mean_direction[..., 1:]],
                      axis=-1)
        # Much like `VonMisesFisher`, we use `l2_normalize` which does
        # nothing if the zero vector is passed in, and thus the householder
        # reflection will do nothing.
        # This is consistent with sampling
        # with `mu = [1, 0, 0, ..., 0]` since samples will be of the
        # form: [w, sqrt(1 - w**2) * u] = w * mu + sqrt(1 - w**2) * v,
        # where:
        #   * `u` is a unit vector sampled from the unit hypersphere.
        #   * `v` is `[0, u]`.
        # This form is the same as sampling from the tangent-normal decomposition.
        u = tf.math.l2_normalize(u, axis=-1)
        return tf.math.l2_normalize(
            y - 2. * tf.math.reduce_sum(y * u, axis=-1, keepdims=True) * u,
            axis=-1)
Exemple #4
0
    def _sample_n(self, n, seed=None):
        mean_direction = tf.convert_to_tensor(self.mean_direction)
        concentration = tf.convert_to_tensor(self.concentration)
        event_size_int = self._event_shape_tensor(
            mean_direction=mean_direction)[0]
        event_size = tf.cast(event_size_int, dtype=self.dtype)

        beta_seed, uniform_seed = samplers.split_seed(seed,
                                                      salt='power_spherical')

        broadcasted_concentration = tf.broadcast_to(
            concentration,
            self._batch_shape_tensor(mean_direction=mean_direction,
                                     concentration=concentration))
        beta = beta_lib.Beta(
            (event_size - 1.) / 2. + broadcasted_concentration,
            (event_size - 1.) / 2.)
        beta_samples = beta.sample(n, seed=beta_seed)

        u_shape = ps.concat(
            [[n],
             self._batch_shape_tensor(mean_direction=mean_direction,
                                      concentration=concentration)],
            axis=0)

        spherical_samples = random_ops.spherical_uniform(
            shape=u_shape,
            dimension=event_size_int - 1,
            dtype=self.dtype,
            seed=uniform_seed)

        t = 2. * beta_samples - 1.
        y = tf.concat([
            t[..., tf.newaxis],
            tf.math.sqrt(1. - tf.math.square(t))[..., tf.newaxis] *
            spherical_samples
        ],
                      axis=-1)
        modified_mean = tf.concat(
            [(1. - mean_direction[..., 0])[..., tf.newaxis],
             -mean_direction[..., 1:]],
            axis=-1)
        modified_mean = tf.math.l2_normalize(modified_mean, axis=-1)
        householder_transform = tf.linalg.LinearOperatorHouseholder(
            modified_mean)
        return householder_transform.matvec(y)
Exemple #5
0
 def _sample_n(self, n, seed=None):
     return random_ops.spherical_uniform(shape=ps.concat(
         [[n], self.batch_shape], axis=0),
                                         dimension=self.dimension,
                                         dtype=self.dtype,
                                         seed=seed)
def sample_lkj(num_samples,
               dimension,
               concentration,
               cholesky_space=False,
               seed=None,
               name=None):
    """Returns a Tensor of samples from an LKJ distribution.

  Args:
    num_samples: Python `int`. The number of samples to draw.
    dimension: Python `int`. The dimension of correlation matrices.
    concentration: `Tensor` representing the concentration of the LKJ
      distribution.
    cholesky_space: Python `bool`. Whether to take samples from LKJ or
      Chol(LKJ).
    seed: Python integer seed for RNG
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    samples: A Tensor of correlation matrices (or Cholesky factors of
      correlation matrices if `cholesky_space = True`) with shape
      `[n] + B + [D, D]`, where `B` is the shape of the `concentration`
      parameter, and `D` is the `dimension`.

  Raises:
    ValueError: If `dimension` is negative.
  """
    if dimension < 0:
        raise ValueError(
            'Cannot sample negative-dimension correlation matrices.')
    # Notation below: B is the batch shape, i.e., tf.shape(concentration)

    # We need 1 seed for beta corr12, and 2 per loop iter.
    num_seeds = 1 + 2 * max(0, dimension - 2)
    seeds = list(samplers.split_seed(seed, n=num_seeds, salt='sample_lkj'))
    with tf.name_scope('sample_lkj' or name):
        concentration = tf.convert_to_tensor(concentration)
        if not dtype_util.is_floating(concentration.dtype):
            raise TypeError(
                'The concentration argument should have floating type, not '
                '{}'.format(dtype_util.name(concentration.dtype)))

        concentration = _replicate(num_samples, concentration)
        concentration_shape = ps.shape(concentration)
        if dimension <= 1:
            # For any dimension <= 1, there is only one possible correlation matrix.
            shape = ps.concat([concentration_shape, [dimension, dimension]],
                              axis=0)
            return tf.ones(shape=shape, dtype=concentration.dtype)
        beta_conc = concentration + (dimension - 2.) / 2.
        beta_dist = beta.Beta(concentration1=beta_conc,
                              concentration0=beta_conc)

        # Note that the sampler below deviates from [1], by doing the sampling in
        # cholesky space. This does not change the fundamental logic of the
        # sampler, but does speed up the sampling.

        # This is the correlation coefficient between the first two dimensions.
        # This is also `r` in reference [1].
        corr12 = 2. * beta_dist.sample(seed=seeds.pop()) - 1.

        # Below we construct the Cholesky of the initial 2x2 correlation matrix,
        # which is of the form:
        # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the
        # first two dimensions.
        # This is the top-left corner of the cholesky of the final sample.
        first_row = tf.concat([
            tf.ones_like(corr12)[..., tf.newaxis],
            tf.zeros_like(corr12)[..., tf.newaxis]
        ],
                              axis=-1)
        second_row = tf.concat(
            [corr12[..., tf.newaxis],
             tf.sqrt(1 - corr12**2)[..., tf.newaxis]],
            axis=-1)

        chol_result = tf.concat(
            [first_row[..., tf.newaxis, :], second_row[..., tf.newaxis, :]],
            axis=-2)

        for n in range(2, dimension):
            # Loop invariant: on entry, result has shape B + [n, n]
            beta_conc = beta_conc - 0.5
            # norm is y in reference [1].
            norm = beta.Beta(concentration1=n / 2.,
                             concentration0=beta_conc).sample(seed=seeds.pop())
            # distance shape: B + [1] for broadcast
            distance = tf.sqrt(norm)[..., tf.newaxis]
            # direction is u in reference [1].
            # direction shape: B + [n]
            direction = random_ops.spherical_uniform(shape=concentration_shape,
                                                     dimension=n,
                                                     dtype=concentration.dtype,
                                                     seed=seeds.pop())
            # raw_correlation is w in reference [1].
            raw_correlation = distance * direction  # shape: B + [n]

            # This is the next row in the cholesky of the result,
            # which differs from the construction in reference [1].
            # In the reference, the new row `z` = chol_result @ raw_correlation^T
            # = C @ raw_correlation^T (where as short hand we use C = chol_result).
            # We prove that the below equation is the right row to add to the
            # cholesky, by showing equality with reference [1].
            # Let S be the sample constructed so far, and let `z` be as in
            # reference [1]. Then at this iteration, the new sample S' will be
            # [[S z^T]
            #  [z 1]]
            # In our case we have the cholesky decomposition factor C, so
            # we want our new row x (same size as z) to satisfy:
            #  [[S z^T]  [[C 0]    [[C^T  x^T]         [[CC^T  Cx^T]
            #   [z 1]] =  [x k]]    [0     k]]  =       [xC^t   xx^T + k**2]]
            # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible,
            # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k
            # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 -
            # distance**2).
            new_row = tf.concat(
                [raw_correlation,
                 tf.sqrt(1. - norm[..., tf.newaxis])],
                axis=-1)

            # Finally add this new row, by growing the cholesky of the result.
            chol_result = tf.concat([
                chol_result,
                tf.zeros_like(chol_result[..., 0][..., tf.newaxis])
            ],
                                    axis=-1)

            chol_result = tf.concat([chol_result, new_row[..., tf.newaxis, :]],
                                    axis=-2)

        assert not seeds, 'Did not use all seeds: ' + len(seeds)
        if cholesky_space:
            return chol_result

        result = tf.matmul(chol_result, chol_result, transpose_b=True)
        # The diagonal for a correlation matrix should always be ones. Due to
        # numerical instability the matmul might not achieve that, so manually set
        # these to ones.
        result = tf.linalg.set_diag(
            result, tf.ones(shape=ps.shape(result)[:-1], dtype=result.dtype))
        # This sampling algorithm can produce near-PSD matrices on which standard
        # algorithms such as `tf.linalg.cholesky` or
        # `tf.linalg.self_adjoint_eigvals` fail. Specifically, as documented in
        # b/116828694, around 2% of trials of 900,000 5x5 matrices (distributed
        # according to 9 different concentration parameter values) contained at
        # least one matrix on which the Cholesky decomposition failed.
        return result