def _log_prob(self, counts): with tf.control_dependencies(self._maybe_assert_valid_sample(counts)): log_p = (tf.math.log(self._probs) if self._logits is None else tf.math.log_softmax(self._logits)) k = tf.convert_to_tensor(self.total_count) return (tf.reduce_sum(counts * log_p, axis=-1) + # log_unnorm_prob tfp_math.log_combinations(k, counts)) # -log_normalization
def ndtri(p, name="ndtri"): """The inverse of the CDF of the Normal distribution function. Returns x such that the area under the pdf from minus infinity to x is equal to p. A piece-wise rational approximation is done for the function. This is a port of the implementation in netlib. Args: p: `Tensor` of type `float32`, `float64`. name: Python string. A name for the operation (default="ndtri"). Returns: x: `Tensor` with `dtype=p.dtype`. Raises: TypeError: if `p` is not floating-type. """ with tf.name_scope(name): p = tf.convert_to_tensor(p, name="p") if dtype_util.as_numpy_dtype(p.dtype) not in [np.float32, np.float64]: raise TypeError( "p.dtype=%s is not handled, see docstring for supported types." % p.dtype) return _ndtri(p)
def _log_survival_function(self, x): scale = tf.convert_to_tensor(self.scale) return self._extend_support( x, scale, lambda x: self.concentration * tf.math.log(scale / x), alt=np.inf)
def _log_normalization(self, concentration=None, name='log_normalization'): """Returns the log normalization of an LKJ distribution. Args: concentration: `float` or `double` `Tensor`. The positive concentration parameter of the LKJ distributions. name: Python `str` name prefixed to Ops created by this function. Returns: log_z: A Tensor of the same shape and dtype as `concentration`, containing the corresponding log normalizers. """ # The formula is from D. Lewandowski et al [1], p. 1999, from the # proof that eqs 16 and 17 are equivalent. with tf.name_scope(name or 'log_normalization_lkj'): concentration = (tf.convert_to_tensor( self.concentration if concentration is None else concentration) ) logpi = np.log(np.pi) ans = tf.zeros_like(concentration) for k in range(1, self.dimension): ans = ans + logpi * (k / 2.) ans = ans + tf.math.lgamma(concentration + (self.dimension - 1 - k) / 2.) ans = ans - tf.math.lgamma(concentration + (self.dimension - 1) / 2.) return ans
def __init__(self, distribution, sample_shape=(), validate_args=False, name=None): """Construct the `Sample` distribution. Args: distribution: The base distribution instance to transform. Typically an instance of `Distribution`. sample_shape: `int` scalar or vector `Tensor` representing the shape of a single sample. validate_args: Python `bool`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. name: The name for ops managed by the distribution. Default value: `None` (i.e., `'Sample' + distribution.name`). """ parameters = dict(locals()) name = name or 'Sample' + distribution.name self._distribution = distribution with tf.name_scope(name) as name: sample_shape = distribution_util.expand_to_vector( tf.convert_to_tensor(sample_shape, dtype_hint=tf.int32, name='sample_shape')) self._sample_shape = sample_shape super(Sample, self).__init__( dtype=self._distribution.dtype, reparameterization_type=self._distribution. reparameterization_type, validate_args=validate_args, allow_nan_stats=self._distribution.allow_nan_stats, parameters=parameters, name=name)
def _extend_support(self, x, scale, f, alt): """Returns `f(x)` if x is in the support, and `alt` otherwise. Given `f` which is defined on the support of this distribution (e.g. x > scale), extend the function definition to the real line by defining `f(x) = alt` for `x < scale`. Args: x: Floating-point Tensor to evaluate `f` at. scale: Floating-point Tensor by which to verify `x` validity. f: Lambda that takes in a tensor and returns a tensor. This represents the function who we want to extend the domain of definition. alt: Python or numpy literal representing the value to use for extending the domain. Returns: Tensor representing an extension of `f(x)`. """ if self.validate_args: return f(x) scale = tf.convert_to_tensor(self.scale) if scale is None else scale is_invalid = x < scale # We need to do this to ensure gradients are sound. y = f(tf.where(is_invalid, scale, x)) if alt == 0.: alt = tf.zeros([], dtype=y.dtype) elif alt == 1.: alt = tf.ones([], dtype=y.dtype) else: alt = dtype_util.as_numpy_dtype(self.dtype)(alt) return tf.where(is_invalid, alt, y)
def _log_prob(self, counts): counts = self._maybe_assert_valid_sample(counts) logits = self._logits_parameter_no_checks() total_count = tf.convert_to_tensor(self.total_count) unnorm = _log_unnormalized_prob(logits, counts, total_count) norm = _log_normalization(counts, total_count) return unnorm - norm
def __init__(self, skewness=None, tailweight=None, validate_args=False, name="sinh_arcsinh"): """Instantiates the `SinhArcsinh` bijector. Args: skewness: Skewness parameter. Float-type `Tensor`. Default is `0` of type `float32`. tailweight: Tailweight parameter. Positive `Tensor` of same `dtype` as `skewness` and broadcastable `shape`. Default is `1` of type `float32`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. """ with tf.name_scope(name) as name: tailweight = 1. if tailweight is None else tailweight skewness = 0. if skewness is None else skewness dtype = dtype_util.common_dtype([tailweight, skewness], dtype_hint=tf.float32) self._skewness = tensor_util.convert_nonref_to_tensor( skewness, dtype=dtype, name="skewness") self._tailweight = tensor_util.convert_nonref_to_tensor( tailweight, dtype=dtype, name="tailweight") self._scale_number = tf.convert_to_tensor(2., dtype=dtype) super(SinhArcsinh, self).__init__(forward_min_event_ndims=0, validate_args=validate_args, name=name)
def _inverse(self, y): map_values = tf.convert_to_tensor(self.map_values) flat_y = tf.reshape(y, shape=[-1]) # Search for the indices of map_values that are closest to flat_y. # Since map_values is strictly increasing, the closest is either the # first one that is strictly greater than flat_y, or the one before it. upper_candidates = tf.minimum( tf.size(map_values) - 1, tf.searchsorted(map_values, values=flat_y, side='right')) lower_candidates = tf.maximum(0, upper_candidates - 1) candidates = tf.stack([lower_candidates, upper_candidates], axis=-1) lower_cand_diff = tf.abs(flat_y - self._forward(lower_candidates)) upper_cand_diff = tf.abs(flat_y - self._forward(upper_candidates)) if self.validate_args: with tf.control_dependencies([ assert_util.assert_near(tf.minimum(lower_cand_diff, upper_cand_diff), 0, message='inverse value not found') ]): candidates = tf.identity(candidates) candidate_selector = tf.stack([ tf.range(tf.size(flat_y), dtype=tf.int32), tf.argmin([lower_cand_diff, upper_cand_diff], output_type=tf.int32) ], axis=-1) return tf.reshape(tf.gather_nd(candidates, candidate_selector), shape=y.shape)
def _mean(self): concentration = tf.convert_to_tensor(self.concentration) scale = tf.convert_to_tensor(self.scale) mean = scale / (concentration - 1.) if self.allow_nan_stats: assertions = [] else: assertions = [ assert_util.assert_less( tf.ones([], self.dtype), concentration, message='mean undefined when any concentration <= 1') ] with tf.control_dependencies(assertions): return tf.where(concentration > 1., mean, dtype_util.as_numpy_dtype(self.dtype)(np.nan))
def _mode(self, samples=None): # Samples count can vary by batch member. Use map_fn to compute mode for # each batch separately. def _get_mode(samples): # TODO(b/123985779): Switch to tf.unique_with_counts_v2 when exposed count = gen_array_ops.unique_with_counts_v2(samples, axis=[0]).count return tf.argmax(count) if samples is None: samples = tf.convert_to_tensor(self._samples) num_samples = self._compute_num_samples(samples) # Flatten samples for each batch. if self._event_ndims == 0: flattened_samples = tf.reshape(samples, [-1, num_samples]) mode_shape = self._batch_shape_tensor(samples) else: event_size = tf.reduce_prod(self._event_shape_tensor(samples)) mode_shape = tf.concat( [self._batch_shape_tensor(samples), self._event_shape_tensor(samples)], axis=0) flattened_samples = tf.reshape(samples, [-1, num_samples, event_size]) indices = tf.map_fn(_get_mode, flattened_samples, dtype=tf.int64) full_indices = tf.stack( [tf.range(tf.shape(indices)[0]), tf.cast(indices, tf.int32)], axis=1) mode = tf.gather_nd(flattened_samples, full_indices) return tf.reshape(mode, mode_shape)
def maybe_assert_bernoulli_param_correctness(is_init, validate_args, probs, probits): """Return assertions for `ProbitBernoulli`-type distributions.""" if is_init: x, name = (probs, 'probs') if probits is None else (probits, 'probits') if not dtype_util.is_floating(x.dtype): raise TypeError( 'Argument `{}` must having floating type.'.format(name)) if not validate_args: return [] assertions = [] if probs is not None: if is_init != tensor_util.is_ref(probs): probs = tf.convert_to_tensor(probs) one = tf.constant(1., probs.dtype) assertions += [ assert_util.assert_non_negative( probs, message='probs has components less than 0.'), assert_util.assert_less_equal( probs, one, message='probs has components greater than 1.') ] return assertions
def log1psquare(x, name=None): """Numerically stable calculation of `log(1 + x**2)` for small or large `|x|`. For sufficiently large `x` we use the following observation: ```none log(1 + x**2) = 2 log(|x|) + log(1 + 1 / x**2) --> 2 log(|x|) as x --> inf ``` Numerically, `log(1 + 1 / x**2)` is `0` when `1 / x**2` is small relative to machine epsilon. Args: x: Float `Tensor` input. name: Python string indicating the name of the TensorFlow operation. Default value: `'log1psquare'`. Returns: log1psq: Float `Tensor` representing `log(1. + x**2.)`. """ with tf.name_scope(name or 'log1psquare'): x = tf.convert_to_tensor(x, dtype_hint=tf.float32, name='x') dtype = dtype_util.as_numpy_dtype(x.dtype) eps = np.finfo(dtype).eps.astype(np.float64) is_large = tf.abs(x) > (eps**-0.5).astype(dtype) # Mask out small x's so the gradient correctly propagates. abs_large_x = tf.where(is_large, tf.abs(x), tf.ones([], x.dtype)) return tf.where(is_large, 2. * tf.math.log(abs_large_x), tf.math.log1p(tf.square(x)))
def ndtr(x, name="ndtr"): """Normal distribution function. Returns the area under the Gaussian probability density function, integrated from minus infinity to x: ``` 1 / x ndtr(x) = ---------- | exp(-0.5 t**2) dt sqrt(2 pi) /-inf = 0.5 (1 + erf(x / sqrt(2))) = 0.5 erfc(x / sqrt(2)) ``` Args: x: `Tensor` of type `float32`, `float64`. name: Python string. A name for the operation (default="ndtr"). Returns: ndtr: `Tensor` with `dtype=x.dtype`. Raises: TypeError: if `x` is not floating-type. """ with tf.name_scope(name): x = tf.convert_to_tensor(x, name="x") if dtype_util.as_numpy_dtype(x.dtype) not in [np.float32, np.float64]: raise TypeError( "x.dtype=%s is not handled, see docstring for supported types." % x.dtype) return _ndtr(x)
def _log_cdf(self, x): scale = tf.convert_to_tensor(self.scale) return self._extend_support( x, scale, lambda x: tf.math.log1p(-(scale / x)**self.concentration), alt=-np.inf)
def _maybe_validate_shape_override(self, override_shape, base_is_scalar, validate_args, name): """Helper to __init__ which ensures override batch/event_shape are valid.""" if override_shape is None: override_shape = [] override_shape = tf.convert_to_tensor(override_shape, dtype=tf.int32, name=name) if not dtype_util.is_integer(override_shape.dtype): raise TypeError("shape override must be an integer") override_is_scalar = _is_scalar_from_shape_tensor(override_shape) if tf.get_static_value(override_is_scalar): return self._empty dynamic_assertions = [] if tensorshape_util.rank(override_shape.shape) is not None: if tensorshape_util.rank(override_shape.shape) != 1: raise ValueError("shape override must be a vector") elif validate_args: dynamic_assertions += [ assert_util.assert_rank( override_shape, 1, message="shape override must be a vector") ] if tf.get_static_value(override_shape) is not None: if any(s < 0 for s in tf.get_static_value(override_shape)): raise ValueError( "shape override must have non-negative elements") elif validate_args: dynamic_assertions += [ assert_util.assert_non_negative( override_shape, message="shape override must have non-negative elements") ] is_both_nonscalar = prefer_static.logical_and( prefer_static.logical_not(base_is_scalar), prefer_static.logical_not(override_is_scalar)) if tf.get_static_value(is_both_nonscalar) is not None: if tf.get_static_value(is_both_nonscalar): raise ValueError("base distribution not scalar") elif validate_args: dynamic_assertions += [ assert_util.assert_equal( is_both_nonscalar, False, message="base distribution not scalar") ] if not dynamic_assertions: return override_shape return distribution_util.with_dependencies(dynamic_assertions, override_shape)
def matrix_diag_transform(matrix, transform=None, name=None): """Transform diagonal of [batch-]matrix, leave rest of matrix unchanged. Create a trainable covariance defined by a Cholesky factor: ```python # Transform network layer into 2 x 2 array. matrix_values = tf.contrib.layers.fully_connected(activations, 4) matrix = tf.reshape(matrix_values, (batch_size, 2, 2)) # Make the diagonal positive. If the upper triangle was zero, this would be a # valid Cholesky factor. chol = matrix_diag_transform(matrix, transform=tf.nn.softplus) # LinearOperatorLowerTriangular ignores the upper triangle. operator = LinearOperatorLowerTriangular(chol) ``` Example of heteroskedastic 2-D linear regression. ```python tfd = tfp.distributions # Get a trainable Cholesky factor. matrix_values = tf.contrib.layers.fully_connected(activations, 4) matrix = tf.reshape(matrix_values, (batch_size, 2, 2)) chol = matrix_diag_transform(matrix, transform=tf.nn.softplus) # Get a trainable mean. mu = tf.contrib.layers.fully_connected(activations, 2) # This is a fully trainable multivariate normal! dist = tfd.MultivariateNormalTriL(mu, chol) # Standard log loss. Minimizing this will 'train' mu and chol, and then dist # will be a distribution predicting labels as multivariate Gaussians. loss = -1 * tf.reduce_mean(dist.log_prob(labels)) ``` Args: matrix: Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are equal. transform: Element-wise function mapping `Tensors` to `Tensors`. To be applied to the diagonal of `matrix`. If `None`, `matrix` is returned unchanged. Defaults to `None`. name: A name to give created ops. Defaults to 'matrix_diag_transform'. Returns: A `Tensor` with same shape and `dtype` as `matrix`. """ with tf.name_scope(name or 'matrix_diag_transform'): matrix = tf.convert_to_tensor(matrix, name='matrix') if transform is None: return matrix # Replace the diag with transformed diag. diag = tf.linalg.diag_part(matrix) transformed_diag = transform(diag) transformed_mat = tf.linalg.set_diag(matrix, transformed_diag) return transformed_mat
def _entropy(self): concentration = tf.convert_to_tensor(self.concentration) k = tf.cast(tf.shape(concentration)[-1], self.dtype) total_concentration = tf.reduce_sum(concentration, axis=-1) return (tf.math.lbeta(concentration) + ((total_concentration - k) * tf.math.digamma(total_concentration)) - tf.reduce_sum((concentration - 1.) * tf.math.digamma(concentration), axis=-1))
def _cdf(self, x): df = tf.convert_to_tensor(self.df) # Take Abs(scale) to make subsequent where work correctly. y = (x - self.loc) / tf.abs(self.scale) x_t = df / (y**2. + df) neg_cdf = 0.5 * tf.math.betainc( 0.5 * tf.broadcast_to(df, prefer_static.shape(x_t)), 0.5, x_t) return tf.where(y < 0., neg_cdf, 1. - neg_cdf)
def _mean(self): df = tf.convert_to_tensor(self.df) loc = tf.convert_to_tensor(self.loc) mean = loc * tf.ones(self._batch_shape_tensor(loc=loc), dtype=self.dtype) if self.allow_nan_stats: return tf.where( df > 1., mean, dtype_util.as_numpy_dtype(self.dtype)(np.nan)) else: return distribution_util.with_dependencies([ assert_util.assert_less( tf.ones([], dtype=self.dtype), df, message='mean not defined for components of df <= 1'), ], mean)
def log_add_exp(x, y, name=None): """Computes `log(exp(x) + exp(y))` in a numerically stable way. Args: x: `float` `Tensor` broadcastable with `y`. y: `float` `Tensor` broadcastable with `x`. name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., `'log_add_exp'`). Returns: log_add_exp: `log(exp(x) + exp(y))` computed in a numerically stable way. """ with tf.name_scope(name or 'log_add_exp'): dtype = dtype_util.common_dtype([x, y], dtype_hint=tf.float32) x = tf.convert_to_tensor(x, dtype=dtype, name='x') y = tf.convert_to_tensor(y, dtype=dtype, name='y') return tf.maximum(x, y) + tf.math.softplus(-abs(x - y))
def _log_prob(self, counts): counts = self._maybe_assert_valid_sample(counts) concentration = tf.convert_to_tensor(self.concentration) ordered_prob = ( tf.math.lbeta(concentration + counts) - tf.math.lbeta(concentration)) return ordered_prob + tfp_math.log_combinations( self.total_count, counts)
def _log_prob(self, x): concentration = tf.convert_to_tensor(self.concentration) mixing_concentration = tf.convert_to_tensor(self.mixing_concentration) mixing_rate = tf.convert_to_tensor(self.mixing_rate) log_normalization = ( tf.math.lgamma(concentration) + tf.math.lgamma(mixing_concentration) - tf.math.lgamma(concentration + mixing_concentration) - mixing_concentration * tf.math.log(mixing_rate)) x = self._maybe_assert_valid_sample(x) log_unnormalized_prob = (tf.math.xlogy(concentration - 1., x) - (concentration + mixing_concentration) * tf.math.log(x + mixing_rate)) return log_unnormalized_prob - log_normalization
def _sample_n(self, n, seed=None): concentration = tf.convert_to_tensor(self.concentration) mixing_concentration = tf.convert_to_tensor(self.mixing_concentration) mixing_rate = tf.convert_to_tensor(self.mixing_rate) seed = SeedStream(seed, 'gamma_gamma') rate = tf.random.gamma( shape=[n], # Be sure to draw enough rates for the fully-broadcasted gamma-gamma. alpha=mixing_concentration + tf.zeros_like(concentration), beta=mixing_rate, dtype=self.dtype, seed=seed()) return tf.random.gamma(shape=[], alpha=concentration, beta=rate, dtype=self.dtype, seed=seed())
def _cdf(self, x): scale = tf.convert_to_tensor(self.scale) return self._extend_support( x, scale, lambda x: -tf.math.expm1(self.concentration * tf.math.log(scale / x )), alt=0.)
def _size(input, out_type=tf.int32, name=None): # pylint: disable=redefined-builtin if not hasattr(input, 'shape'): x = np.array(input) input = tf.convert_to_tensor(input) if x.dtype is np.object else x n = tensorshape_util.num_elements(tf.TensorShape(input.shape)) if n is None: return tf.size(input, out_type=out_type, name=name) return np.array(n).astype(_numpy_dtype(out_type))
def _covariance(self): p = self._probs_parameter_no_checks() k = tf.convert_to_tensor(self.total_count) return tf.linalg.set_diag( (-k[..., tf.newaxis, tf.newaxis] * (p[..., :, tf.newaxis] * p[..., tf.newaxis, :])), # Outer product. k[..., tf.newaxis] * p * (1. - p))
def _sample_n(self, n, seed=None): seed = SeedStream(seed, "beta") concentration1 = tf.convert_to_tensor(self.concentration1) concentration0 = tf.convert_to_tensor(self.concentration0) shape = self._batch_shape_tensor(concentration1, concentration0) expanded_concentration1 = tf.broadcast_to(concentration1, shape) expanded_concentration0 = tf.broadcast_to(concentration0, shape) gamma1_sample = tf.random.gamma(shape=[n], alpha=expanded_concentration1, dtype=self.dtype, seed=seed()) gamma2_sample = tf.random.gamma(shape=[n], alpha=expanded_concentration0, dtype=self.dtype, seed=seed()) beta_sample = gamma1_sample / (gamma1_sample + gamma2_sample) return beta_sample
def _mode(self): concentration1 = tf.convert_to_tensor(self.concentration1) concentration0 = tf.convert_to_tensor(self.concentration0) mode = (concentration1 - 1.) / (concentration1 + concentration0 - 2.) with tf.control_dependencies([] if self.allow_nan_stats else [ # pylint: disable=g-long-ternary assert_util. assert_less(tf.ones([], dtype=self.dtype), concentration1, message="Mode undefined for concentration1 <= 1."), assert_util. assert_less(tf.ones([], dtype=self.dtype), concentration0, message="Mode undefined for concentration0 <= 1.") ]): return tf.where((concentration1 > 1.) & (concentration0 > 1.), mode, dtype_util.as_numpy_dtype(self.dtype)(np.nan))
def __init__(self, bijectors, block_sizes=None, validate_args=False, name=None): """Creates the bijector. Args: bijectors: A non-empty list of bijectors. block_sizes: A 1-D integer `Tensor` with each element signifying the length of the block of the input vector to pass to the corresponding bijector. The length of `block_sizes` must be be equal to the length of `bijectors`. If left as None, a vector of 1's is used. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str`, name given to ops managed by this object. Default: E.g., `Blockwise([Exp(), Softplus()]).name == 'blockwise_of_exp_and_softplus'`. Raises: NotImplementedError: If a bijector with `event_ndims` > 1 or one that reshapes events is passed. ValueError: If `bijectors` list is empty. ValueError: If size of `block_sizes` does not equal to the length of bijectors or is not a vector. """ if not name: name = 'blockwise_of_' + '_and_'.join([b.name for b in bijectors]) name = name.replace('/', '') with tf.name_scope(name) as name: super(Blockwise, self).__init__( forward_min_event_ndims=1, validate_args=validate_args, name=name) if not bijectors: raise ValueError('`bijectors` must not be empty.') for bijector in bijectors: if (bijector.forward_min_event_ndims > 1 or (bijector.inverse_min_event_ndims != bijector.forward_min_event_ndims)): # TODO(siege): In the future, it can be reasonable to support N-D # bijectors by concatenating along some specific axis, broadcasting # low-D bijectors appropriately. raise NotImplementedError('Only scalar and vector event-shape ' 'bijectors that do not alter the ' 'shape are supported at this time.') self._bijectors = bijectors if block_sizes is None: block_sizes = tf.ones(len(bijectors), dtype=tf.int32) self._block_sizes = tf.convert_to_tensor( block_sizes, name='block_sizes', dtype_hint=tf.int32) self._block_sizes = _validate_block_sizes(self._block_sizes, bijectors, validate_args)