Esempio n. 1
0
    def _cdf(self, x):
        low = tf.convert_to_tensor(self.low)
        high = tf.convert_to_tensor(self.high)
        peak = tf.convert_to_tensor(self.peak)

        interval_length = high - low
        # Due to the PDF being not smooth at the peak, we have to treat each side
        # somewhat differently. The PDF is two line segments, and thus we get
        # quadratics here for the CDF.
        result_inside_interval = tf.where(
            (x >= low) & (x <= peak),
            # (x - low) ** 2 / ((high - low) * (peak - low))
            tf.math.squared_difference(x, low) / (interval_length *
                                                  (peak - low)),
            # 1 - (high - x) ** 2 / ((high - low) * (high - peak))
            1. - tf.math.squared_difference(high, x) / (interval_length *
                                                        (high - peak)))

        # We now add that the left tail is 0 and the right tail is 1.
        result_if_not_big = tf.where(x < low, tf.zeros_like(x),
                                     result_inside_interval)

        return tf.where(x >= high, tf.ones_like(x), result_if_not_big)
Esempio n. 2
0
 def _mode(self):
   concentration = tf.convert_to_tensor(self.concentration)
   rate = tf.convert_to_tensor(self.rate)
   mode = (concentration - 1.) / rate
   if self.allow_nan_stats:
     assertions = []
   else:
     assertions = [assert_util.assert_less(
         tf.ones([], self.dtype), concentration,
         message='Mode not defined when any concentration <= 1.')]
   with tf.control_dependencies(assertions):
     return tf.where(
         concentration > 1.,
         mode,
         dtype_util.as_numpy_dtype(self.dtype)(np.nan))
Esempio n. 3
0
    def _prob(self, x):
        low = tf.convert_to_tensor(self.low)
        high = tf.convert_to_tensor(self.high)
        peak = tf.convert_to_tensor(self.peak)

        if self.validate_args:
            with tf.control_dependencies([
                    assert_util.assert_greater_equal(x, low),
                    assert_util.assert_less_equal(x, high)
            ]):
                x = tf.identity(x)

        interval_length = high - low
        # This is the pdf function when a low <= high <= x. This looks like
        # a triangle, so we have to treat each line segment separately.
        result_inside_interval = tf.where(
            (x >= low) & (x <= peak),
            # Line segment from (low, 0) to (peak, 2 / (high - low)).
            2. * (x - low) / (interval_length * (peak - low)),
            # Line segment from (peak, 2 / (high - low)) to (high, 0).
            2. * (high - x) / (interval_length * (high - peak)))

        return tf.where((x < low) | (x > high), tf.zeros_like(x),
                        result_inside_interval)
Esempio n. 4
0
 def _mode(self):
     df = tf.convert_to_tensor(self.df)
     mode = df - 2.
     if self.allow_nan_stats:
         assertions = []
     else:
         assertions = [
             assert_util.assert_less(
                 2. * tf.ones([], self.dtype),
                 df,
                 message='Mode not defined when df <= 2.')
         ]
     with tf.control_dependencies(assertions):
         return tf.where(df > 2., mode,
                         dtype_util.as_numpy_dtype(self.dtype)(np.nan))
Esempio n. 5
0
 def _cdf(self, x):
     x = tf.convert_to_tensor(x, name='x')
     flat_x = tf.reshape(x, shape=[-1])
     upper_bound = tf.searchsorted(self.outcomes,
                                   values=flat_x,
                                   side='right')
     values_at_ub = tf.gather(
         self.outcomes,
         indices=tf.minimum(
             upper_bound,
             dist_util.prefer_static_shape(self.outcomes)[-1] - 1))
     should_use_upper_bound = self._is_equal_or_close(flat_x, values_at_ub)
     indices = tf.where(should_use_upper_bound, upper_bound,
                        upper_bound - 1)
     return self._categorical.cdf(
         tf.reshape(indices, shape=dist_util.prefer_static_shape(x)))
 def _mean(self):
     concentration = tf.convert_to_tensor(self.concentration)
     scale = tf.convert_to_tensor(self.scale)
     mean = scale / (concentration - 1.)
     if self.allow_nan_stats:
         assertions = []
     else:
         assertions = [
             assert_util.assert_less(
                 tf.ones([], self.dtype),
                 concentration,
                 message='mean undefined when any concentration <= 1')
         ]
     with tf.control_dependencies(assertions):
         return tf.where(concentration > 1., mean,
                         dtype_util.as_numpy_dtype(self.dtype)(np.nan))
Esempio n. 7
0
    def _cdf(self, x):
        # CDF(x) at positive integer x is the probability that the Zipf variable is
        # less than or equal to x; given by the formula:
        #     CDF(x) = 1 - (zeta(power, x + 1) / Z)
        # For fractional x, the CDF is equal to the CDF at n = floor(x).
        # For x < 1, the CDF is zero.

        # If interpolate_nondiscrete is True, we return a continuous relaxation
        # which agrees with the CDF at integer points.
        power = tf.convert_to_tensor(self.power)
        x = tf.cast(x, power.dtype)
        safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x),
                            0.)

        cdf = 1. - (tf.math.zeta(power, safe_x + 1.) / tf.math.zeta(power, 1.))
        return tf.where(x < 1., tf.zeros_like(cdf), cdf)
Esempio n. 8
0
 def _mean(self):
   df = tf.convert_to_tensor(self.df)
   loc = tf.convert_to_tensor(self.loc)
   mean = loc * tf.ones(self._batch_shape_tensor(loc=loc),
                        dtype=self.dtype)
   if self.allow_nan_stats:
     return tf.where(
         df > 1.,
         mean,
         dtype_util.as_numpy_dtype(self.dtype)(np.nan))
   else:
     return distribution_util.with_dependencies([
         assert_util.assert_less(
             tf.ones([], dtype=self.dtype),
             df,
             message='mean not defined for components of df <= 1'),
     ], mean)
Esempio n. 9
0
 def _mode(self):
     concentration1 = tf.convert_to_tensor(self.concentration1)
     concentration0 = tf.convert_to_tensor(self.concentration0)
     mode = (concentration1 - 1.) / (concentration1 + concentration0 - 2.)
     with tf.control_dependencies([] if self.allow_nan_stats else [  # pylint: disable=g-long-ternary
             assert_util.
             assert_less(tf.ones([], dtype=self.dtype),
                         concentration1,
                         message="Mode undefined for concentration1 <= 1."),
             assert_util.
             assert_less(tf.ones([], dtype=self.dtype),
                         concentration0,
                         message="Mode undefined for concentration0 <= 1.")
     ]):
         return tf.where((concentration1 > 1.) & (concentration0 > 1.),
                         mode,
                         dtype_util.as_numpy_dtype(self.dtype)(np.nan))
 def body_fn(w, should_continue):
     z = beta.sample(sample_shape=sample_batch_shape, seed=seed())
     # set_shape needed here because of b/139013403
     z.set_shape(w.shape)
     w = tf.where(should_continue,
                  (1 - (1 + b) * z) / (1 - (1 - b) * z), w)
     w = tf.debugging.check_numerics(w, 'w')
     unif = tf.random.uniform(sample_batch_shape,
                              seed=seed(),
                              dtype=self.dtype)
     # set_shape needed here because of b/139013403
     unif.set_shape(w.shape)
     should_continue = tf.logical_and(
         should_continue,
         self.concentration * w + dim * tf.math.log1p(-x * w) - c <
         tf.math.log(unif))
     return w, should_continue
Esempio n. 11
0
def log_cdf_laplace(x, name="log_cdf_laplace"):
  """Log Laplace distribution function.

  This function calculates `Log[L(x)]`, where `L(x)` is the cumulative
  distribution function of the Laplace distribution, i.e.

  ```L(x) := 0.5 * int_{-infty}^x e^{-|t|} dt```

  For numerical accuracy, `L(x)` is computed in different ways depending on `x`,

  ```
  x <= 0:
    Log[L(x)] = Log[0.5] + x, which is exact

  0 < x:
    Log[L(x)] = Log[1 - 0.5 * e^{-x}], which is exact
  ```

  Args:
    x: `Tensor` of type `float32`, `float64`.
    name: Python string. A name for the operation (default="log_ndtr").

  Returns:
    `Tensor` with `dtype=x.dtype`.

  Raises:
    TypeError: if `x.dtype` is not handled.
  """

  with tf.name_scope(name):
    x = tf.convert_to_tensor(x, name="x")

    # For x < 0, L(x) = 0.5 * exp{x} exactly, so Log[L(x)] = log(0.5) + x.
    lower_solution = -np.log(2.) + x

    # safe_exp_neg_x = exp{-x} for x > 0, but is
    # bounded above by 1, which avoids
    #   log[1 - 1] = -inf for x = log(1/2), AND
    #   exp{-x} --> inf, for x << -1
    safe_exp_neg_x = tf.exp(-tf.abs(x))

    # log1p(z) = log(1 + z) approx z for |z| << 1. This approxmation is used
    # internally by log1p, rather than being done explicitly here.
    upper_solution = tf.math.log1p(-0.5 * safe_exp_neg_x)

    return tf.where(x < 0., lower_solution, upper_solution)
Esempio n. 12
0
    def _mode(self):
        a = tf.convert_to_tensor(self.concentration1)
        b = tf.convert_to_tensor(self.concentration0)
        mode = ((a - 1) / (a * b - 1))**(1. / a)
        if self.allow_nan_stats:
            return tf.where((a > 1.) & (b > 1.), mode,
                            dtype_util.as_numpy_dtype(self.dtype)(np.nan))

        return distribution_util.with_dependencies([
            assert_util.assert_less(
                tf.ones([], dtype=a.dtype),
                a,
                message="Mode undefined for concentration1 <= 1."),
            assert_util.assert_less(
                tf.ones([], dtype=b.dtype),
                b,
                message="Mode undefined for concentration0 <= 1.")
        ], mode)
Esempio n. 13
0
    def _mean(self):
        concentration = tf.convert_to_tensor(self.concentration)
        mixing_concentration = tf.convert_to_tensor(self.mixing_concentration)
        mixing_rate = tf.convert_to_tensor(self.mixing_rate)

        mean = concentration * mixing_rate / (mixing_concentration - 1.)
        if self.allow_nan_stats:
            return tf.where(mixing_concentration > 1., mean,
                            dtype_util.as_numpy_dtype(self.dtype)(np.nan))
        else:
            with tf.control_dependencies([
                    assert_util.assert_less(
                        tf.ones([], self.dtype),
                        mixing_concentration,
                        message=
                        'mean undefined when `mixing_concentration` <= 1'),
            ]):
                return tf.identity(mean)
Esempio n. 14
0
 def _mode(self):
   concentration = tf.convert_to_tensor(self.concentration)
   k = tf.cast(tf.shape(concentration)[-1], self.dtype)
   total_concentration = tf.reduce_sum(concentration, axis=-1)
   mode = (concentration - 1.) / (total_concentration[..., tf.newaxis] - k)
   if self.allow_nan_stats:
     return tf.where(
         tf.reduce_all(concentration > 1., axis=-1, keepdims=True),
         mode,
         dtype_util.as_numpy_dtype(self.dtype)(np.nan))
   assertions = [
       assert_util.assert_less(
           tf.ones([], self.dtype),
           concentration,
           message='Mode undefined when any concentration <= 1')
   ]
   with tf.control_dependencies(assertions):
     return tf.identity(mode)
Esempio n. 15
0
    def _variance(self):
        concentration = tf.convert_to_tensor(self.concentration)
        scale = tf.convert_to_tensor(self.scale)
        var = (tf.square(scale) / tf.square(concentration - 1.) /
               (concentration - 2.))
        if self.allow_nan_stats:
            assertions = []
        else:
            assertions = [
                assert_util.assert_less(
                    tf.constant(2., dtype=self.dtype),
                    concentration,
                    message='variance undefined when any concentration <= 2')
            ]

        with tf.control_dependencies(assertions):
            return tf.where(concentration > 2., var,
                            dtype_util.as_numpy_dtype(self.dtype)(np.nan))
Esempio n. 16
0
def non_negative_axis(axis, rank, name=None):  # pylint:disable=redefined-outer-name
  """Make (possibly negatively indexed) `axis` argument non-negative."""
  with tf.name_scope(name or 'non_negative_axis'):
    if axis is None:
      return None
    if rank is None:
      raise ValueError('Argument `rank` cannot be `None`.')
    dtype = dtype_util.as_numpy_dtype(
        dtype_util.common_dtype([axis, rank], dtype_hint=tf.int32))
    rank_ = tf.get_static_value(rank)
    axis_ = tf.get_static_value(axis)
    if rank_ is None or axis_ is None:
      axis = tf.convert_to_tensor(axis, dtype=dtype, name='axis')
      rank = tf.convert_to_tensor(rank, dtype=dtype, name='rank')
      return tf.where(axis < 0, rank + axis, axis)
    axis_ = np.array(axis_, dtype=dtype)
    rank_ = np.array(rank_, dtype=dtype)
    return np.where(axis_ < 0, axis_ + rank_, axis_)
Esempio n. 17
0
    def _variance(self):
        concentration = tf.convert_to_tensor(self.concentration)
        mixing_concentration = tf.convert_to_tensor(self.mixing_concentration)
        mixing_rate = tf.convert_to_tensor(self.mixing_rate)

        variance = (tf.square(concentration * mixing_rate /
                              (mixing_concentration - 1.)) /
                    (mixing_concentration - 2.))
        if self.allow_nan_stats:
            return tf.where(mixing_concentration > 2., variance,
                            dtype_util.as_numpy_dtype(self.dtype)(np.nan))
        else:
            with tf.control_dependencies([
                    assert_util.assert_less(
                        tf.ones([], self.dtype) * 2.,
                        mixing_concentration,
                        message=
                        'variance undefined when `mixing_concentration` <= 2')
            ]):
                return tf.identity(variance)
Esempio n. 18
0
def _sqrtx2p1(x):
    """Implementation of `sqrt(1 + x**2)` which is stable despite large `x`."""
    sqrt_eps = np.sqrt(np.finfo(dtype_util.as_numpy_dtype(x.dtype)).eps)
    return tf.where(
        tf.abs(x) * sqrt_eps <= 1.,
        tf.sqrt(x**2. + 1.),
        # For large x, calculating x**2 can overflow. This can be alleviated by
        # considering:
        # sqrt(1 + x**2)
        # = exp(0.5 log(1 + x**2))
        # = exp(0.5 log(x**2 * (1 + x**-2)))
        # = exp(log(x) + 0.5 * log(1 + x**-2))
        # = |x| * exp(0.5 log(1 + x**-2))
        # = |x| * sqrt(1 + x**-2)
        # We omit the last term in this approximation.
        # When |x| > 1 / sqrt(machineepsilon), the second term will be 1,
        # due to sqrt(1 + x**-2) = 1. This is also true with the gradient term,
        # and higher order gradients, since the first order derivative of
        # sqrt(1 + x**-2) is -2 * x**-3 / (1 + x**-2) = -2 / (x**3 + x),
        # and all nth-order derivatives will be O(x**-(n + 2)). This makes any
        # gradient terms that contain any derivatives of sqrt(1 + x**-2) vanish.
        tf.abs(x))
Esempio n. 19
0
def pinv(a, rcond=None, validate_args=False, name=None):
    """Compute the Moore-Penrose pseudo-inverse of a matrix.

  Calculate the [generalized inverse of a matrix](
  https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse) using its
  singular-value decomposition (SVD) and including all large singular values.

  The pseudo-inverse of a matrix `A`, is defined as: 'the matrix that 'solves'
  [the least-squares problem] `A @ x = b`,' i.e., if `x_hat` is a solution, then
  `A_pinv` is the matrix such that `x_hat = A_pinv @ b`. It can be shown that if
  `U @ Sigma @ V.T = A` is the singular value decomposition of `A`, then
  `A_pinv = V @ inv(Sigma) U^T`. [(Strang, 1980)][1]

  This function is analogous to [`numpy.linalg.pinv`](
  https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.pinv.html).
  It differs only in default value of `rcond`. In `numpy.linalg.pinv`, the
  default `rcond` is `1e-15`. Here the default is
  `10. * max(num_rows, num_cols) * np.finfo(dtype).eps`.

  Args:
    a: (Batch of) `float`-like matrix-shaped `Tensor`(s) which are to be
      pseudo-inverted.
    rcond: `Tensor` of small singular value cutoffs.  Singular values smaller
      (in modulus) than `rcond` * largest_singular_value (again, in modulus) are
      set to zero. Must broadcast against `tf.shape(a)[:-2]`.
      Default value: `10. * max(num_rows, num_cols) * np.finfo(a.dtype).eps`.
    validate_args: When `True`, additional assertions might be embedded in the
      graph.
      Default value: `False` (i.e., no graph assertions are added).
    name: Python `str` prefixed to ops created by this function.
      Default value: 'pinv'.

  Returns:
    a_pinv: The pseudo-inverse of input `a`. Has same shape as `a` except
      rightmost two dimensions are transposed.

  Raises:
    TypeError: if input `a` does not have `float`-like `dtype`.
    ValueError: if input `a` has fewer than 2 dimensions.

  #### Examples

  ```python
  from tensorflow_probability.python.internal.backend import numpy as tf
  import tensorflow_probability as tfp; tfp = tfp.experimental.substrates.numpy

  a = tf.constant([[1.,  0.4,  0.5],
                   [0.4, 0.2,  0.25],
                   [0.5, 0.25, 0.35]])
  tf.matmul(tfp.math.pinv(a), a)
  # ==> array([[1., 0., 0.],
               [0., 1., 0.],
               [0., 0., 1.]], dtype=float32)

  a = tf.constant([[1.,  0.4,  0.5,  1.],
                   [0.4, 0.2,  0.25, 2.],
                   [0.5, 0.25, 0.35, 3.]])
  tf.matmul(tfp.math.pinv(a), a)
  # ==> array([[ 0.76,  0.37,  0.21, -0.02],
               [ 0.37,  0.43, -0.33,  0.02],
               [ 0.21, -0.33,  0.81,  0.01],
               [-0.02,  0.02,  0.01,  1.  ]], dtype=float32)
  ```

  #### References

  [1]: G. Strang. 'Linear Algebra and Its Applications, 2nd Ed.' Academic Press,
       Inc., 1980, pp. 139-142.
  """
    with tf.name_scope(name or 'pinv'):
        a = tf.convert_to_tensor(a, name='a')

        assertions = _maybe_validate_matrix(a, validate_args)
        if assertions:
            with tf.control_dependencies(assertions):
                a = tf.identity(a)

        dtype = dtype_util.as_numpy_dtype(a.dtype)

        if rcond is None:

            def get_dim_size(dim):
                if tf.compat.dimension_value(a.shape[dim]) is not None:
                    return tf.compat.dimension_value(a.shape[dim])
                return tf.shape(a)[dim]

            num_rows = get_dim_size(-2)
            num_cols = get_dim_size(-1)
            if isinstance(num_rows, int) and isinstance(num_cols, int):
                max_rows_cols = float(max(num_rows, num_cols))
            else:
                max_rows_cols = tf.cast(tf.maximum(num_rows, num_cols), dtype)
            rcond = 10. * max_rows_cols * np.finfo(dtype).eps

        rcond = tf.convert_to_tensor(rcond, dtype=dtype, name='rcond')

        # Calculate pseudo inverse via SVD.
        # Note: if a is symmetric then u == v. (We might observe additional
        # performance by explicitly setting `v = u` in such cases.)
        [
            singular_values,  # Sigma
            left_singular_vectors,  # U
            right_singular_vectors,  # V
        ] = tf.linalg.svd(a, full_matrices=False, compute_uv=True)

        # Saturate small singular values to inf. This has the effect of make
        # `1. / s = 0.` while not resulting in `NaN` gradients.
        cutoff = rcond * tf.reduce_max(singular_values, axis=-1)
        singular_values = tf.where(singular_values > cutoff[..., tf.newaxis],
                                   singular_values, np.array(np.inf, dtype))

        # Although `a == tf.matmul(u, s * v, transpose_b=True)` we swap
        # `u` and `v` here so that `tf.matmul(pinv(A), A) = tf.eye()`, i.e.,
        # a matrix inverse has 'transposed' semantics.
        a_pinv = tf.matmul(right_singular_vectors /
                           singular_values[..., tf.newaxis, :],
                           left_singular_vectors,
                           adjoint_b=True)

        if tensorshape_util.rank(a.shape) is not None:
            a_pinv.set_shape(a.shape[:-2].concatenate(
                [a.shape[-1], a.shape[-2]]))

        return a_pinv
Esempio n. 20
0
def _slice_single_param(param, param_event_ndims, slices, dist_batch_shape):
    """Slices a single parameter of a distribution.

  Args:
    param: A `Tensor`, the original parameter to slice.
    param_event_ndims: `int` event parameterization rank for this parameter.
    slices: A `tuple` of normalized slices.
    dist_batch_shape: The distribution's batch shape `Tensor`.

  Returns:
    new_param: A `Tensor`, batch-sliced according to slices.
  """
    # Extend param shape with ones on the left to match dist_batch_shape.
    param_shape = tf.shape(input=param)
    insert_ones = tf.ones(
        [tf.size(input=dist_batch_shape) + param_event_ndims - tf.rank(param)],
        dtype=param_shape.dtype)
    new_param_shape = tf.concat([insert_ones, param_shape], axis=0)
    full_batch_param = tf.reshape(param, new_param_shape)
    param_slices = []
    # We separately track the batch axis from the parameter axis because we want
    # them to align for positive indexing, and be offset by param_event_ndims for
    # negative indexing.
    param_dim_idx = 0
    batch_dim_idx = 0
    for slc in slices:
        if slc is tf.newaxis:
            param_slices.append(slc)
            continue
        if slc is Ellipsis:
            if batch_dim_idx < 0:
                raise ValueError(
                    'Found multiple `...` in slices {}'.format(slices))
            param_slices.append(slc)
            # Switch over to negative indexing for the broadcast check.
            num_remaining_non_newaxis_slices = sum([
                s is not tf.newaxis
                for s in slices[slices.index(Ellipsis) + 1:]
            ])
            batch_dim_idx = -num_remaining_non_newaxis_slices
            param_dim_idx = batch_dim_idx - param_event_ndims
            continue
        # Find the batch dimension sizes for both parameter and distribution.
        param_dim_size = new_param_shape[param_dim_idx]
        batch_dim_size = dist_batch_shape[batch_dim_idx]
        is_broadcast = batch_dim_size > param_dim_size
        # Slices are denoted by start:stop:step.
        if isinstance(slc, slice):
            start, stop, step = slc.start, slc.stop, slc.step
            if start is not None:
                start = tf.where(is_broadcast, 0, start)
            if stop is not None:
                stop = tf.where(is_broadcast, 1, stop)
            if step is not None:
                step = tf.where(is_broadcast, 1, step)
            param_slices.append(slice(start, stop, step))
        else:  # int, or int Tensor, e.g. d[d.batch_shape_tensor()[0] // 2]
            param_slices.append(tf.where(is_broadcast, 0, slc))
        param_dim_idx += 1
        batch_dim_idx += 1
    param_slices.extend([ALL_SLICE] * param_event_ndims)
    return full_batch_param.__getitem__(param_slices)
Esempio n. 21
0
 def _mode(self):
     s = self.df - self.dimension - 1.
     s = tf.where(s < 0., dtype_util.as_numpy_dtype(s.dtype)(np.nan), s)
     if self.input_output_cholesky:
         return tf.sqrt(s) * self.scale_operator.to_dense()
     return s * self._square_scale_operator()
Esempio n. 22
0
    def _sample_n(self, n, seed=None):
        power = tf.convert_to_tensor(self.power)
        shape = tf.concat([[n], tf.shape(power)], axis=0)

        has_seed = seed is not None
        seed = SeedStream(seed, salt='zipf')

        minval_u = self._hat_integral(0.5, power=power) + 1.
        maxval_u = self._hat_integral(tf.int64.max - 0.5, power=power)

        def loop_body(should_continue, k):
            """Resample the non-accepted points."""
            # The range of U is chosen so that the resulting sample K lies in
            # [0, tf.int64.max). The final sample, if accepted, is K + 1.
            u = tf.random.uniform(shape,
                                  minval=minval_u,
                                  maxval=maxval_u,
                                  dtype=power.dtype,
                                  seed=seed())

            # Sample the point X from the continuous density h(x) \propto x^(-power).
            x = self._hat_integral_inverse(u, power=power)

            # Rejection-inversion requires a `hat` function, h(x) such that
            # \int_{k - .5}^{k + .5} h(x) dx >= pmf(k + 1) for points k in the
            # support. A natural hat function for us is h(x) = x^(-power).
            #
            # After sampling X from h(x), suppose it lies in the interval
            # (K - .5, K + .5) for integer K. Then the corresponding K is accepted if
            # if lies to the left of x_K, where x_K is defined by:
            #   \int_{x_k}^{K + .5} h(x) dx = H(x_K) - H(K + .5) = pmf(K + 1),
            # where H(x) = \int_x^inf h(x) dx.

            # Solving for x_K, we find that x_K = H_inverse(H(K + .5) + pmf(K + 1)).
            # Or, the acceptance condition is X <= H_inverse(H(K + .5) + pmf(K + 1)).
            # Since X = H_inverse(U), this simplifies to U <= H(K + .5) + pmf(K + 1).

            # Update the non-accepted points.
            # Since X \in (K - .5, K + .5), the sample K is chosen as floor(X + 0.5).
            k = tf.where(should_continue, tf.floor(x + 0.5), k)
            accept = (u <= self._hat_integral(k + .5, power=power) +
                      tf.exp(self._log_prob(k + 1, power=power)))

            return [should_continue & (~accept), k]

        should_continue, samples = tf.while_loop(
            cond=lambda should_continue, *ignore: tf.reduce_any(should_continue
                                                                ),
            body=loop_body,
            loop_vars=[
                tf.ones(shape, dtype=tf.bool),  # should_continue
                tf.zeros(shape, dtype=power.dtype),  # k
            ],
            parallel_iterations=1 if has_seed else 10,
            maximum_iterations=self.sample_maximum_iterations,
        )
        samples = samples + 1.

        if self.validate_args and dtype_util.is_integer(self.dtype):
            samples = distribution_util.embed_check_integer_casting_closed(
                samples, target_dtype=self.dtype, assert_positive=True)

        samples = tf.cast(samples, self.dtype)

        if self.validate_args:
            npdt = dtype_util.as_numpy_dtype(self.dtype)
            v = npdt(
                dtype_util.min(npdt) if dtype_util.is_integer(npdt) else np.nan
            )
            samples = tf.where(should_continue, v, samples)

        return samples
Esempio n. 23
0
def reduce_weighted_logsumexp(logx,
                              w=None,
                              axis=None,
                              keep_dims=False,
                              return_sign=False,
                              name=None):
  """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`.

  If all weights `w` are known to be positive, it is more efficient to directly
  use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more
  efficient than `du.reduce_weighted_logsumexp(logx, w)`.

  Reduces `input_tensor` along the dimensions given in `axis`.
  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
  entry in `axis`. If `keep_dims` is true, the reduced dimensions
  are retained with length 1.

  If `axis` has no entries, all dimensions are reduced, and a
  tensor with a single element is returned.

  This function is more numerically stable than log(sum(w * exp(input))). It
  avoids overflows caused by taking the exp of large inputs and underflows
  caused by taking the log of small inputs.

  For example:

  ```python
  x = tf.constant([[0., 0, 0],
                   [0, 0, 0]])

  w = tf.constant([[-1., 1, 1],
                   [1, 1, 1]])

  du.reduce_weighted_logsumexp(x, w)
  # ==> log(-1*1 + 1*1 + 1*1 + 1*1 + 1*1 + 1*1) = log(4)

  du.reduce_weighted_logsumexp(x, w, axis=0)
  # ==> [log(-1+1), log(1+1), log(1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1)
  # ==> [log(-1+1+1), log(1+1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1, keep_dims=True)
  # ==> [[log(-1+1+1)], [log(1+1+1)]]

  du.reduce_weighted_logsumexp(x, w, axis=[0, 1])
  # ==> log(-1+5)
  ```

  Args:
    logx: The tensor to reduce. Should have numeric type.
    w: The weight tensor. Should have numeric type identical to `logx`.
    axis: The dimensions to reduce. If `None` (the default), reduces all
      dimensions. Must be in the range `[-rank(input_tensor),
      rank(input_tensor))`.
    keep_dims: If true, retains reduced dimensions with length 1.
    return_sign: If `True`, returns the sign of the result.
    name: A name for the operation (optional).

  Returns:
    lswe: The `log(abs(sum(weight * exp(x))))` reduced tensor.
    sign: (Optional) The sign of `sum(weight * exp(x))`.
  """
  with tf.name_scope(name or 'reduce_weighted_logsumexp'):
    logx = tf.convert_to_tensor(logx, name='logx')
    if w is None:
      lswe = tf.reduce_logsumexp(logx, axis=axis, keepdims=keep_dims)
      if return_sign:
        sgn = tf.ones_like(lswe)
        return lswe, sgn
      return lswe
    w = tf.convert_to_tensor(w, dtype=logx.dtype, name='w')
    log_absw_x = logx + tf.math.log(tf.abs(w))
    max_log_absw_x = tf.reduce_max(log_absw_x, axis=axis, keepdims=True)
    # If the largest element is `-inf` or `inf` then we don't bother subtracting
    # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That
    # this is ok follows from the fact that we're actually free to subtract any
    # value we like, so long as we add it back after taking the `log(sum(...))`.
    max_log_absw_x = tf.where(
        tf.math.is_inf(max_log_absw_x),
        tf.zeros([], max_log_absw_x.dtype),
        max_log_absw_x)
    wx_over_max_absw_x = (tf.sign(w) * tf.exp(log_absw_x - max_log_absw_x))
    sum_wx_over_max_absw_x = tf.reduce_sum(
        wx_over_max_absw_x, axis=axis, keepdims=keep_dims)
    if not keep_dims:
      max_log_absw_x = tf.squeeze(max_log_absw_x, axis)
    sgn = tf.sign(sum_wx_over_max_absw_x)
    lswe = max_log_absw_x + tf.math.log(sgn * sum_wx_over_max_absw_x)
    if return_sign:
      return lswe, sgn
    return lswe
 def _mode(self):
     total_count = tf.convert_to_tensor(self.total_count)
     adjusted_count = tf.where(1. < total_count, total_count - 1.,
                               tf.zeros_like(total_count))
     return tf.floor(adjusted_count *
                     tf.exp(self._logits_parameter_no_checks()))
Esempio n. 25
0
 def _variance(self):
     concentration = tf.convert_to_tensor(self.concentration)
     valid_variance = (self.scale**2 * concentration /
                       ((concentration - 1.)**2 * (concentration - 2.)))
     return tf.where(concentration > 2., valid_variance,
                     dtype_util.as_numpy_dtype(self.dtype)(np.inf))
Esempio n. 26
0
 def _mean(self):
     concentration = tf.convert_to_tensor(self.concentration)
     return tf.where(concentration > 1.,
                     concentration * self.scale / (concentration - 1),
                     dtype_util.as_numpy_dtype(self.dtype)(np.inf))
Esempio n. 27
0
def _ndtri(p):
  """Implements ndtri core logic."""

  # Constants used in piece-wise rational approximations. Taken from the cephes
  # library:
  # https://root.cern.ch/doc/v608/SpecFuncCephesInv_8cxx_source.html
  p0 = list(reversed([-5.99633501014107895267E1,
                      9.80010754185999661536E1,
                      -5.66762857469070293439E1,
                      1.39312609387279679503E1,
                      -1.23916583867381258016E0]))
  q0 = list(reversed([1.0,
                      1.95448858338141759834E0,
                      4.67627912898881538453E0,
                      8.63602421390890590575E1,
                      -2.25462687854119370527E2,
                      2.00260212380060660359E2,
                      -8.20372256168333339912E1,
                      1.59056225126211695515E1,
                      -1.18331621121330003142E0]))
  p1 = list(reversed([4.05544892305962419923E0,
                      3.15251094599893866154E1,
                      5.71628192246421288162E1,
                      4.40805073893200834700E1,
                      1.46849561928858024014E1,
                      2.18663306850790267539E0,
                      -1.40256079171354495875E-1,
                      -3.50424626827848203418E-2,
                      -8.57456785154685413611E-4]))
  q1 = list(reversed([1.0,
                      1.57799883256466749731E1,
                      4.53907635128879210584E1,
                      4.13172038254672030440E1,
                      1.50425385692907503408E1,
                      2.50464946208309415979E0,
                      -1.42182922854787788574E-1,
                      -3.80806407691578277194E-2,
                      -9.33259480895457427372E-4]))
  p2 = list(reversed([3.23774891776946035970E0,
                      6.91522889068984211695E0,
                      3.93881025292474443415E0,
                      1.33303460815807542389E0,
                      2.01485389549179081538E-1,
                      1.23716634817820021358E-2,
                      3.01581553508235416007E-4,
                      2.65806974686737550832E-6,
                      6.23974539184983293730E-9]))
  q2 = list(reversed([1.0,
                      6.02427039364742014255E0,
                      3.67983563856160859403E0,
                      1.37702099489081330271E0,
                      2.16236993594496635890E-1,
                      1.34204006088543189037E-2,
                      3.28014464682127739104E-4,
                      2.89247864745380683936E-6,
                      6.79019408009981274425E-9]))

  def _create_polynomial(var, coeffs):
    """Compute n_th order polynomial via Horner's method."""
    coeffs = np.array(coeffs, dtype_util.as_numpy_dtype(var.dtype))
    if not coeffs.size:
      return tf.zeros_like(var)
    return coeffs[0] + _create_polynomial(var, coeffs[1:]) * var

  maybe_complement_p = tf.where(p > -np.expm1(-2.), 1. - p, p)
  # Write in an arbitrary value in place of 0 for p since 0 will cause NaNs
  # later on. The result from the computation when p == 0 is not used so any
  # number that doesn't result in NaNs is fine.
  sanitized_mcp = tf.where(
      maybe_complement_p <= 0.,
      dtype_util.as_numpy_dtype(p.dtype)(0.5), maybe_complement_p)

  # Compute x for p > exp(-2): x/sqrt(2pi) = w + w**3 P0(w**2)/Q0(w**2).
  w = sanitized_mcp - 0.5
  ww = w ** 2
  x_for_big_p = w + w * ww * (_create_polynomial(ww, p0)
                              / _create_polynomial(ww, q0))
  x_for_big_p *= -np.sqrt(2. * np.pi)

  # Compute x for p <= exp(-2): x = z - log(z)/z - (1/z) P(1/z) / Q(1/z),
  # where z = sqrt(-2. * log(p)), and P/Q are chosen between two different
  # arrays based on whether p < exp(-32).
  z = tf.sqrt(-2. * tf.math.log(sanitized_mcp))
  first_term = z - tf.math.log(z) / z
  second_term_small_p = (
      _create_polynomial(1. / z, p2) /
      _create_polynomial(1. / z, q2) / z)
  second_term_otherwise = (
      _create_polynomial(1. / z, p1) /
      _create_polynomial(1. / z, q1) / z)
  x_for_small_p = first_term - second_term_small_p
  x_otherwise = first_term - second_term_otherwise

  x = tf.where(sanitized_mcp > np.exp(-2.), x_for_big_p,
               tf.where(z >= 8.0, x_for_small_p, x_otherwise))

  x = tf.where(p > 1. - np.exp(-2.), x, -x)
  infinity_scalar = tf.constant(np.inf, dtype=p.dtype)
  x_nan_replaced = tf.where(p <= 0.0, -infinity_scalar,
                            tf.where(p >= 1.0, infinity_scalar, x))
  return x_nan_replaced
Esempio n. 28
0
def log_ndtr(x, series_order=3, name="log_ndtr"):
  """Log Normal distribution function.

  For details of the Normal distribution function see `ndtr`.

  This function calculates `(log o ndtr)(x)` by either calling `log(ndtr(x))` or
  using an asymptotic series. Specifically:
  - For `x > upper_segment`, use the approximation `-ndtr(-x)` based on
    `log(1-x) ~= -x, x << 1`.
  - For `lower_segment < x <= upper_segment`, use the existing `ndtr` technique
    and take a log.
  - For `x <= lower_segment`, we use the series approximation of erf to compute
    the log CDF directly.

  The `lower_segment` is set based on the precision of the input:

  ```
  lower_segment = { -20,  x.dtype=float64
                  { -10,  x.dtype=float32
  upper_segment = {   8,  x.dtype=float64
                  {   5,  x.dtype=float32
  ```

  When `x < lower_segment`, the `ndtr` asymptotic series approximation is:

  ```
     ndtr(x) = scale * (1 + sum) + R_N
     scale   = exp(-0.5 x**2) / (-x sqrt(2 pi))
     sum     = Sum{(-1)^n (2n-1)!! / (x**2)^n, n=1:N}
     R_N     = O(exp(-0.5 x**2) (2N+1)!! / |x|^{2N+3})
  ```

  where `(2n-1)!! = (2n-1) (2n-3) (2n-5) ...  (3) (1)` is a
  [double-factorial](https://en.wikipedia.org/wiki/Double_factorial).


  Args:
    x: `Tensor` of type `float32`, `float64`.
    series_order: Positive Python `integer`. Maximum depth to
      evaluate the asymptotic expansion. This is the `N` above.
    name: Python string. A name for the operation (default="log_ndtr").

  Returns:
    log_ndtr: `Tensor` with `dtype=x.dtype`.

  Raises:
    TypeError: if `x.dtype` is not handled.
    TypeError: if `series_order` is a not Python `integer.`
    ValueError:  if `series_order` is not in `[0, 30]`.
  """
  if not isinstance(series_order, int):
    raise TypeError("series_order must be a Python integer.")
  if series_order < 0:
    raise ValueError("series_order must be non-negative.")
  if series_order > 30:
    raise ValueError("series_order must be <= 30.")

  with tf.name_scope(name):
    x = tf.convert_to_tensor(x, name="x")

    if dtype_util.base_equal(x.dtype, tf.float64):
      lower_segment = LOGNDTR_FLOAT64_LOWER
      upper_segment = LOGNDTR_FLOAT64_UPPER
    elif dtype_util.base_equal(x.dtype, tf.float32):
      lower_segment = LOGNDTR_FLOAT32_LOWER
      upper_segment = LOGNDTR_FLOAT32_UPPER
    else:
      raise TypeError("x.dtype=%s is not supported." % x.dtype)

    # The basic idea here was ported from:
    #   https://root.cern.ch/doc/v608/SpecFuncCephesInv_8cxx_source.html
    # We copy the main idea, with a few changes
    # * For x >> 1, and X ~ Normal(0, 1),
    #     Log[P[X < x]] = Log[1 - P[X < -x]] approx -P[X < -x],
    #     which extends the range of validity of this function.
    # * We use one fixed series_order for all of 'x', rather than adaptive.
    # * Our docstring properly reflects that this is an asymptotic series, not a
    #   Taylor series. We also provided a correct bound on the remainder.
    # * We need to use the max/min in the _log_ndtr_lower arg to avoid nan when
    #   x=0. This happens even though the branch is unchosen because when x=0
    #   the gradient of a select involves the calculation 1*dy+0*(-inf)=nan
    #   regardless of whether dy is finite. Note that the minimum is a NOP if
    #   the branch is chosen.
    return tf.where(
        x > upper_segment,
        -_ndtr(-x),  # log(1-x) ~= -x, x << 1
        tf.where(
            x > lower_segment,
            tf.math.log(_ndtr(tf.maximum(x, lower_segment))),
            _log_ndtr_lower(tf.minimum(x, lower_segment), series_order)))
Esempio n. 29
0
 def _get_safe_input(self, x, loc, scale):
     safe_value = 0.5 * scale + loc
     return tf.where(x < loc, safe_value, x)
    def _observation_log_probs(self, observations, mask):
        """Compute and shape tensor of log probs associated with observations.."""

        # Let E be the underlying event shape
        #     M the number of steps in the HMM
        #     N the number of states of the HMM
        #
        # Then the incoming observations have shape
        #
        # observations : batch_o [M] E
        #
        # and the mask (if present) has shape
        #
        # mask : batch_m [M]
        #
        # Let this HMM distribution have batch shape batch_d
        # We need to broadcast all three of these batch shapes together
        # into the shape batch.
        #
        # We need to move the step dimension to the first dimension to make
        # them suitable for folding or scanning over.
        #
        # When we call `log_prob` for our observations we need to
        # do this for each state the observation could correspond to.
        # We do this by expanding the dimensions by 1 so we end up with:
        #
        # observations : [M] batch [1] [E]
        #
        # After calling `log_prob` we get
        #
        # observation_log_probs : [M] batch [N]
        #
        # We wish to use `mask` to select from this so we also
        # reshape and broadcast it up to shape
        #
        # mask : [M] batch [N]

        observation_tensor_shape = tf.shape(observations)
        observation_batch_shape = observation_tensor_shape[:-1 - self.
                                                           _underlying_event_rank]
        observation_event_shape = observation_tensor_shape[
            -1 - self._underlying_event_rank:]

        if mask is not None:
            mask_tensor_shape = tf.shape(mask)
            mask_batch_shape = mask_tensor_shape[:-1]

        batch_shape = tf.broadcast_dynamic_shape(observation_batch_shape,
                                                 self.batch_shape_tensor())

        if mask is not None:
            batch_shape = tf.broadcast_dynamic_shape(batch_shape,
                                                     mask_batch_shape)
        observations = tf.broadcast_to(
            observations,
            tf.concat([batch_shape, observation_event_shape], axis=0))
        observation_rank = tf.rank(observations)
        underlying_event_rank = self._underlying_event_rank
        observations = distribution_util.move_dimension(
            observations, observation_rank - underlying_event_rank - 1, 0)
        observations = tf.expand_dims(observations,
                                      observation_rank - underlying_event_rank)
        observation_log_probs = self._observation_distribution.log_prob(
            observations)

        if mask is not None:
            mask = tf.broadcast_to(
                mask, tf.concat([batch_shape, [self._num_steps]], axis=0))
            mask = distribution_util.move_dimension(mask, -1, 0)
            observation_log_probs = tf.where(
                mask[..., tf.newaxis], tf.zeros_like(observation_log_probs),
                observation_log_probs)

        return observation_log_probs