예제 #1
0
 def testLogGammaDifference(self):
     y = tfp.distributions.HalfCauchy(loc=8., scale=10.).sample(
         10000, test_util.test_seed())
     y_64 = tf.cast(y, tf.float64)
     # Not testing x near zero because the naive method is too inaccurate.
     # We will get implicit coverage in testLogBeta, where a good reference
     # implementation is available (scipy_special.betaln).
     x = tfp.distributions.Uniform(low=4.,
                                   high=12.).sample(10000,
                                                    test_util.test_seed())
     x_64 = tf.cast(x, tf.float64)
     naive_64_ = tf.math.lgamma(y_64) - tf.math.lgamma(x_64 + y_64)
     naive_64, sophisticated, sophisticated_64 = self.evaluate([
         naive_64_,
         tfp_math.log_gamma_difference(x, y),
         tfp_math.log_gamma_difference(x_64, y_64)
     ])
     # Check that we're in the ballpark of the definition (which has to be
     # computed in double precision because it's so inaccurate).
     self.assertAllClose(naive_64, sophisticated_64, atol=1e-6, rtol=4e-4)
     # Check that we don't lose accuracy in single precision, at least relative
     # to ourselves.
     atol = 1e-8
     rtol = 1e-6
     self.assertAllClose(sophisticated,
                         sophisticated_64,
                         atol=atol,
                         rtol=rtol)
예제 #2
0
  def _variance(self):
    df = tf.convert_to_tensor(self.df)
    scale = tf.convert_to_tensor(self.scale)
    # We need to put the tf.where inside the outer tf.where to ensure we never
    # hit a NaN in the gradient.
    first_denom = tf.where(df > 2., df - 2., 1.)
    second_denom = tf.where(df > 1., df - 1., 1.)
    var = (
        tf.ones(self._batch_shape_tensor(df=df, scale=scale),
                dtype=self.dtype) * tf.square(scale) * df / first_denom -
        tf.math.exp(2. * tf.math.log(scale) + np.log(4.) + tf.math.log(df) -
                    np.log(np.pi) - 2. * tf.math.log(second_denom) -
                    2. * tfp_math.log_gamma_difference(0.5, 0.5 * df)))
    # When 1 < df <= 2, variance is infinite.
    result_where_defined = tf.where(
        df > 2., var,
        dtype_util.as_numpy_dtype(self.dtype)(np.inf))

    if self.allow_nan_stats:
      return tf.where(df > 1., result_where_defined,
                      dtype_util.as_numpy_dtype(self.dtype)(np.nan))
    else:
      return distribution_util.with_dependencies([
          assert_util.assert_less(
              tf.ones([], dtype=self.dtype),
              df,
              message='variance not defined for components of df <= 1'),
      ], result_where_defined)
예제 #3
0
    def _log_normalization(self, concentration=None, name='log_normalization'):
        """Returns the log normalization of a CholeskyLKJ distribution.

    Args:
      concentration: `float` or `double` `Tensor`. The positive concentration
        parameter of the CholeskyLKJ distributions.
      name: Python `str` name prefixed to Ops created by this function.

    Returns:
      log_z: A Tensor of the same shape and dtype as `concentration`, containing
        the corresponding log normalizers.
    """
        # The formula is from D. Lewandowski et al [1], p. 1999, from the
        # proof that eqs 16 and 17 are equivalent.
        # Instead of using a for loop for k from 1 to (dimension - 1), we will
        # vectorize the computation by performing operations on the vector
        # `dimension_range = np.arange(1, dimension)`.
        with tf.name_scope(name or 'log_normalization_lkj'):
            if concentration is None:
                concentration = tf.convert_to_tensor(self.concentration)
            logpi = float(np.log(np.pi))
            dimension_range = np.arange(1.,
                                        self.dimension,
                                        dtype=dtype_util.as_numpy_dtype(
                                            concentration.dtype))
            effective_concentration = (
                concentration[..., tf.newaxis] +
                (self.dimension - 1 - dimension_range) / 2.)
            ans = tf.reduce_sum(tfp_math.log_gamma_difference(
                dimension_range / 2., effective_concentration),
                                axis=-1)
            # Then we add to `ans` the sum of `logpi / 2 * k` for `k` run from 1 to
            # `dimension - 1`.
            ans = ans + logpi * (self.dimension * (self.dimension - 1) / 4.)
            return ans
예제 #4
0
    def _log_normalization(self, concentration=None, name='log_normalization'):
        """Returns the log normalization of an LKJ distribution.

    Args:
      concentration: `float` or `double` `Tensor`. The positive concentration
        parameter of the LKJ distributions.
      name: Python `str` name prefixed to Ops created by this function.

    Returns:
      log_z: A Tensor of the same shape and dtype as `concentration`, containing
        the corresponding log normalizers.
    """
        # The formula is from D. Lewandowski et al [1], p. 1999, from the
        # proof that eqs 16 and 17 are equivalent.
        with tf.name_scope(name or 'log_normalization_lkj'):
            concentration = (tf.convert_to_tensor(
                self.concentration if concentration is None else concentration)
                             )
            logpi = float(np.log(np.pi))
            ans = tf.zeros_like(concentration)
            for k in range(1, self.dimension):
                ans = ans + logpi * (k / 2.)
                effective_concentration = concentration + (self.dimension - 1 -
                                                           k) / 2.
                ans = ans + tfp_math.log_gamma_difference(
                    k / 2., effective_concentration)
            return ans
예제 #5
0
    def _entropy(self):
        df = tf.broadcast_to(self.df, self.batch_shape_tensor())
        num_dims = tf.cast(self.event_shape_tensor()[0], self.dtype)

        shape_factor = self._scale.log_abs_determinant()
        beta_factor = (num_dims / 2. * (tf.math.log(df) + np.log(np.pi)) +
                       tfp_math.log_gamma_difference(num_dims / 2., df / 2.))
        digamma_factor = (num_dims + df) / 2. * (tf.math.digamma(
            (num_dims + df) / 2.) - tf.math.digamma(df / 2.))
        return shape_factor + beta_factor + digamma_factor
예제 #6
0
 def testLogGammaDifference(self):
   y = tfp.distributions.HalfCauchy(loc=8., scale=10.).sample(
       10000, test_util.test_seed())
   x = tfp.distributions.Uniform(low=0., high=8.).sample(
       10000, test_util.test_seed())
   naive_ = tf.math.lgamma(y) - tf.math.lgamma(x + y)
   naive, sophisticated = self.evaluate(
       [naive_, tfp_math.log_gamma_difference(x, y)])
   # Loose tolerance because naive method is inaccurate
   self.assertAllClose(naive, sophisticated, rtol=1e-3)
예제 #7
0
  def _log_normalization(self, concentration=None, mean_direction=None):
    """Computes the log-normalizer of the distribution."""
    if concentration is None:
      concentration = tf.convert_to_tensor(self.concentration)
    event_size = tf.cast(self._event_shape_tensor(
        mean_direction=mean_direction)[-1], self.dtype)

    concentration1 = concentration + (event_size - 1.) / 2.
    concentration0 = (event_size - 1.) / 2.

    return ((concentration1 + concentration0) * np.log(2.) +
            concentration0 * np.log(np.pi) +
            tfp_math.log_gamma_difference(concentration0, concentration1))
예제 #8
0
 def _mean(self):
     df = tf.convert_to_tensor(self.df)
     loc = tf.convert_to_tensor(self.loc)
     scale = tf.convert_to_tensor(self.scale)
     log_correction = (tf.math.log(scale) + np.log(2.) + 0.5 *
                       (tf.math.log(df) - np.log(np.pi)) -
                       tfp_math.log_gamma_difference(0.5, 0.5 * df) -
                       tf.math.log(df - 1))
     mean = tf.math.exp(log_correction) + loc
     if self.allow_nan_stats:
         return tf.where(df > 1., mean,
                         dtype_util.as_numpy_dtype(self.dtype)(np.nan))
     else:
         return distribution_util.with_dependencies([
             assert_util.assert_less(
                 tf.ones([], dtype=self.dtype),
                 df,
                 message='mean not defined for components of df <= 1'),
         ], mean)
예제 #9
0
def log_prob(x, df, loc, scale):
    """Compute log probability of Student T distribution.

  Note that scale can be negative.

  Args:
    x: Floating-point `Tensor`. Where to compute the log probabilities.
    df: Floating-point `Tensor`. The degrees of freedom of the
      distribution(s). `df` must contain only positive values.
    loc: Floating-point `Tensor`; the location(s) of the distribution(s).
    scale: Floating-point `Tensor`; the scale(s) of the distribution(s).

  Returns:
    A `Tensor` with shape broadcast according to the arguments.
  """
    # Writing `y` this way reduces XLA mem copies.
    y = (x - loc) * (tf.math.rsqrt(df) / scale)
    log_unnormalized_prob = -0.5 * (df + 1.) * log1psquare(y)
    log_normalization = (tf.math.log(tf.abs(scale)) + 0.5 * tf.math.log(df) +
                         0.5 * np.log(np.pi) +
                         tfp_math.log_gamma_difference(0.5, 0.5 * df))
    return log_unnormalized_prob - log_normalization
예제 #10
0
 def _mean(self, df=None):
   df = tf.convert_to_tensor(self.df if df is None else df)
   return np.sqrt(2.) * tf.exp(
       -tfp_math.log_gamma_difference(0.5, 0.5 * df))
예제 #11
0
 def _log_normalization(self):
     df = tf.convert_to_tensor(self.df)
     num_dims = tf.cast(self.event_shape_tensor()[0], self.dtype)
     return (tfp_math.log_gamma_difference(num_dims / 2., df / 2.) +
             num_dims / 2. * (tf.math.log(df) + np.log(np.pi)) +
             self.scale.log_abs_determinant())