def testLogGammaDifference(self): y = tfp.distributions.HalfCauchy(loc=8., scale=10.).sample( 10000, test_util.test_seed()) y_64 = tf.cast(y, tf.float64) # Not testing x near zero because the naive method is too inaccurate. # We will get implicit coverage in testLogBeta, where a good reference # implementation is available (scipy_special.betaln). x = tfp.distributions.Uniform(low=4., high=12.).sample(10000, test_util.test_seed()) x_64 = tf.cast(x, tf.float64) naive_64_ = tf.math.lgamma(y_64) - tf.math.lgamma(x_64 + y_64) naive_64, sophisticated, sophisticated_64 = self.evaluate([ naive_64_, tfp_math.log_gamma_difference(x, y), tfp_math.log_gamma_difference(x_64, y_64) ]) # Check that we're in the ballpark of the definition (which has to be # computed in double precision because it's so inaccurate). self.assertAllClose(naive_64, sophisticated_64, atol=1e-6, rtol=4e-4) # Check that we don't lose accuracy in single precision, at least relative # to ourselves. atol = 1e-8 rtol = 1e-6 self.assertAllClose(sophisticated, sophisticated_64, atol=atol, rtol=rtol)
def _variance(self): df = tf.convert_to_tensor(self.df) scale = tf.convert_to_tensor(self.scale) # We need to put the tf.where inside the outer tf.where to ensure we never # hit a NaN in the gradient. first_denom = tf.where(df > 2., df - 2., 1.) second_denom = tf.where(df > 1., df - 1., 1.) var = ( tf.ones(self._batch_shape_tensor(df=df, scale=scale), dtype=self.dtype) * tf.square(scale) * df / first_denom - tf.math.exp(2. * tf.math.log(scale) + np.log(4.) + tf.math.log(df) - np.log(np.pi) - 2. * tf.math.log(second_denom) - 2. * tfp_math.log_gamma_difference(0.5, 0.5 * df))) # When 1 < df <= 2, variance is infinite. result_where_defined = tf.where( df > 2., var, dtype_util.as_numpy_dtype(self.dtype)(np.inf)) if self.allow_nan_stats: return tf.where(df > 1., result_where_defined, dtype_util.as_numpy_dtype(self.dtype)(np.nan)) else: return distribution_util.with_dependencies([ assert_util.assert_less( tf.ones([], dtype=self.dtype), df, message='variance not defined for components of df <= 1'), ], result_where_defined)
def _log_normalization(self, concentration=None, name='log_normalization'): """Returns the log normalization of a CholeskyLKJ distribution. Args: concentration: `float` or `double` `Tensor`. The positive concentration parameter of the CholeskyLKJ distributions. name: Python `str` name prefixed to Ops created by this function. Returns: log_z: A Tensor of the same shape and dtype as `concentration`, containing the corresponding log normalizers. """ # The formula is from D. Lewandowski et al [1], p. 1999, from the # proof that eqs 16 and 17 are equivalent. # Instead of using a for loop for k from 1 to (dimension - 1), we will # vectorize the computation by performing operations on the vector # `dimension_range = np.arange(1, dimension)`. with tf.name_scope(name or 'log_normalization_lkj'): if concentration is None: concentration = tf.convert_to_tensor(self.concentration) logpi = float(np.log(np.pi)) dimension_range = np.arange(1., self.dimension, dtype=dtype_util.as_numpy_dtype( concentration.dtype)) effective_concentration = ( concentration[..., tf.newaxis] + (self.dimension - 1 - dimension_range) / 2.) ans = tf.reduce_sum(tfp_math.log_gamma_difference( dimension_range / 2., effective_concentration), axis=-1) # Then we add to `ans` the sum of `logpi / 2 * k` for `k` run from 1 to # `dimension - 1`. ans = ans + logpi * (self.dimension * (self.dimension - 1) / 4.) return ans
def _log_normalization(self, concentration=None, name='log_normalization'): """Returns the log normalization of an LKJ distribution. Args: concentration: `float` or `double` `Tensor`. The positive concentration parameter of the LKJ distributions. name: Python `str` name prefixed to Ops created by this function. Returns: log_z: A Tensor of the same shape and dtype as `concentration`, containing the corresponding log normalizers. """ # The formula is from D. Lewandowski et al [1], p. 1999, from the # proof that eqs 16 and 17 are equivalent. with tf.name_scope(name or 'log_normalization_lkj'): concentration = (tf.convert_to_tensor( self.concentration if concentration is None else concentration) ) logpi = float(np.log(np.pi)) ans = tf.zeros_like(concentration) for k in range(1, self.dimension): ans = ans + logpi * (k / 2.) effective_concentration = concentration + (self.dimension - 1 - k) / 2. ans = ans + tfp_math.log_gamma_difference( k / 2., effective_concentration) return ans
def _entropy(self): df = tf.broadcast_to(self.df, self.batch_shape_tensor()) num_dims = tf.cast(self.event_shape_tensor()[0], self.dtype) shape_factor = self._scale.log_abs_determinant() beta_factor = (num_dims / 2. * (tf.math.log(df) + np.log(np.pi)) + tfp_math.log_gamma_difference(num_dims / 2., df / 2.)) digamma_factor = (num_dims + df) / 2. * (tf.math.digamma( (num_dims + df) / 2.) - tf.math.digamma(df / 2.)) return shape_factor + beta_factor + digamma_factor
def testLogGammaDifference(self): y = tfp.distributions.HalfCauchy(loc=8., scale=10.).sample( 10000, test_util.test_seed()) x = tfp.distributions.Uniform(low=0., high=8.).sample( 10000, test_util.test_seed()) naive_ = tf.math.lgamma(y) - tf.math.lgamma(x + y) naive, sophisticated = self.evaluate( [naive_, tfp_math.log_gamma_difference(x, y)]) # Loose tolerance because naive method is inaccurate self.assertAllClose(naive, sophisticated, rtol=1e-3)
def _log_normalization(self, concentration=None, mean_direction=None): """Computes the log-normalizer of the distribution.""" if concentration is None: concentration = tf.convert_to_tensor(self.concentration) event_size = tf.cast(self._event_shape_tensor( mean_direction=mean_direction)[-1], self.dtype) concentration1 = concentration + (event_size - 1.) / 2. concentration0 = (event_size - 1.) / 2. return ((concentration1 + concentration0) * np.log(2.) + concentration0 * np.log(np.pi) + tfp_math.log_gamma_difference(concentration0, concentration1))
def _mean(self): df = tf.convert_to_tensor(self.df) loc = tf.convert_to_tensor(self.loc) scale = tf.convert_to_tensor(self.scale) log_correction = (tf.math.log(scale) + np.log(2.) + 0.5 * (tf.math.log(df) - np.log(np.pi)) - tfp_math.log_gamma_difference(0.5, 0.5 * df) - tf.math.log(df - 1)) mean = tf.math.exp(log_correction) + loc if self.allow_nan_stats: return tf.where(df > 1., mean, dtype_util.as_numpy_dtype(self.dtype)(np.nan)) else: return distribution_util.with_dependencies([ assert_util.assert_less( tf.ones([], dtype=self.dtype), df, message='mean not defined for components of df <= 1'), ], mean)
def log_prob(x, df, loc, scale): """Compute log probability of Student T distribution. Note that scale can be negative. Args: x: Floating-point `Tensor`. Where to compute the log probabilities. df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. loc: Floating-point `Tensor`; the location(s) of the distribution(s). scale: Floating-point `Tensor`; the scale(s) of the distribution(s). Returns: A `Tensor` with shape broadcast according to the arguments. """ # Writing `y` this way reduces XLA mem copies. y = (x - loc) * (tf.math.rsqrt(df) / scale) log_unnormalized_prob = -0.5 * (df + 1.) * log1psquare(y) log_normalization = (tf.math.log(tf.abs(scale)) + 0.5 * tf.math.log(df) + 0.5 * np.log(np.pi) + tfp_math.log_gamma_difference(0.5, 0.5 * df)) return log_unnormalized_prob - log_normalization
def _mean(self, df=None): df = tf.convert_to_tensor(self.df if df is None else df) return np.sqrt(2.) * tf.exp( -tfp_math.log_gamma_difference(0.5, 0.5 * df))
def _log_normalization(self): df = tf.convert_to_tensor(self.df) num_dims = tf.cast(self.event_shape_tensor()[0], self.dtype) return (tfp_math.log_gamma_difference(num_dims / 2., df / 2.) + num_dims / 2. * (tf.math.log(df) + np.log(np.pi)) + self.scale.log_abs_determinant())