Exemple #1
0
 def _forward(self, x):
     with tf.control_dependencies(self._maybe_assert_valid_x(x)):
         if self.power == 0.:
             return tf.exp(x)
         # If large x accuracy is an issue, consider using:
         # (1. + x * self.power)**(1. / self.power) when x >> 1.
         return tf.exp(tf.math.log1p(x * self.power) / self.power)
    def _marginal_hidden_probs(self):
        """Compute marginal pdf for each individual observable."""

        initial_log_probs = tf.broadcast_to(
            self._log_init,
            tf.concat([self.batch_shape_tensor(), [self._num_states]], axis=0))

        # initial_log_probs :: batch_shape num_states

        def _scan_multiple_steps():
            """Perform `scan` operation when `num_steps` > 1."""

            transition_log_probs = self._log_trans

            def forward_step(log_probs, _):
                return _log_vector_matrix(log_probs, transition_log_probs)

            dummy_index = tf.zeros(self._num_steps - 1, dtype=tf.float32)

            forward_log_probs = tf.scan(forward_step,
                                        dummy_index,
                                        initializer=initial_log_probs,
                                        name="forward_log_probs")

            return tf.concat([[initial_log_probs], forward_log_probs], axis=0)

        forward_log_probs = prefer_static.cond(
            self._num_steps > 1, _scan_multiple_steps,
            lambda: initial_log_probs[tf.newaxis, ...])

        return tf.exp(forward_log_probs)
Exemple #3
0
    def loop_body(should_continue, k):
      """Resample the non-accepted points."""
      # The range of U is chosen so that the resulting sample K lies in
      # [0, tf.int64.max). The final sample, if accepted, is K + 1.
      u = tf.random.uniform(
          shape,
          minval=minval_u,
          maxval=maxval_u,
          dtype=power.dtype,
          seed=seed())

      # Sample the point X from the continuous density h(x) \propto x^(-power).
      x = self._hat_integral_inverse(u, power=power)

      # Rejection-inversion requires a `hat` function, h(x) such that
      # \int_{k - .5}^{k + .5} h(x) dx >= pmf(k + 1) for points k in the
      # support. A natural hat function for us is h(x) = x^(-power).
      #
      # After sampling X from h(x), suppose it lies in the interval
      # (K - .5, K + .5) for integer K. Then the corresponding K is accepted if
      # if lies to the left of x_K, where x_K is defined by:
      #   \int_{x_k}^{K + .5} h(x) dx = H(x_K) - H(K + .5) = pmf(K + 1),
      # where H(x) = \int_x^inf h(x) dx.

      # Solving for x_K, we find that x_K = H_inverse(H(K + .5) + pmf(K + 1)).
      # Or, the acceptance condition is X <= H_inverse(H(K + .5) + pmf(K + 1)).
      # Since X = H_inverse(U), this simplifies to U <= H(K + .5) + pmf(K + 1).

      # Update the non-accepted points.
      # Since X \in (K - .5, K + .5), the sample K is chosen as floor(X + 0.5).
      k = tf.where(should_continue, tf.floor(x + 0.5), k)
      accept = (u <= self._hat_integral(k + .5, power=power) + tf.exp(
          self._log_prob(k + 1, power=power)))

      return [should_continue & (~accept), k]
 def _inverse(self, y):
     x = tf.identity(y)
     if self.shift is not None:
         x = x - self.shift
     if self.scale is not None:
         x = x / self.scale
     if self.log_scale is not None:
         x = x * tf.exp(-self.log_scale)
     return x
 def _forward(self, x):
     y = tf.identity(x)
     if self.scale is not None:
         y = y * self.scale
     if self.log_scale is not None:
         y = y * tf.exp(self.log_scale)
     if self.shift is not None:
         y = y + self.shift
     return y
Exemple #6
0
 def _sample_n(self, n, seed=None):
   concentration = tf.convert_to_tensor(self.concentration)
   scale = tf.convert_to_tensor(self.scale)
   shape = tf.concat(
       [[n],
        self._batch_shape_tensor(concentration=concentration, scale=scale)],
       axis=0)
   sampled = tf.random.uniform(shape, maxval=1., seed=seed, dtype=self.dtype)
   log_sample = tf.math.log(scale) - tf.math.log1p(-sampled) / concentration
   return tf.exp(log_sample)
Exemple #7
0
            def grad(dy):
                """Computes a derivative for the min and max parameters.

        This function implements the derivative wrt the truncation bounds, which
        get blocked by the sampler. We use a custom expression for numerical
        stability instead of automatic differentiation on CDF for implicit
        gradients.

        Args:
          dy: output gradients

        Returns:
           The standard normal samples and the gradients wrt the upper
           bound and lower bound.
        """
                # std_samples has an extra dimension (the sample dimension), expand
                # lower and upper so they broadcast along this dimension.
                # See note above regarding parameterized_truncated_normal, the sample
                # dimension is the final dimension.
                lower_broadcast = lower[..., tf.newaxis]
                upper_broadcast = upper[..., tf.newaxis]

                cdf_samples = ((special_math.ndtr(std_samples) -
                                special_math.ndtr(lower_broadcast)) /
                               (special_math.ndtr(upper_broadcast) -
                                special_math.ndtr(lower_broadcast)))

                # tiny, eps are tolerance parameters to ensure we stay away from giving
                # a zero arg to the log CDF expression.

                tiny = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny
                eps = np.finfo(dtype_util.as_numpy_dtype(self.dtype)).eps
                cdf_samples = tf.clip_by_value(cdf_samples, tiny, 1 - eps)

                du = tf.exp(0.5 * (std_samples**2 - upper_broadcast**2) +
                            tf.math.log(cdf_samples))
                dl = tf.exp(0.5 * (std_samples**2 - lower_broadcast**2) +
                            tf.math.log1p(-cdf_samples))

                # Reduce the gradient across the samples
                grad_u = tf.reduce_sum(dy * du, axis=-1)
                grad_l = tf.reduce_sum(dy * dl, axis=-1)
                return [grad_l, grad_u]
 def _cdf(self, x):
   with tf.control_dependencies(self._maybe_assert_valid_sample(x)):
     concentration = tf.convert_to_tensor(self.concentration)
     loc = tf.convert_to_tensor(self.loc)
     return (
         special_math.ndtr(
             ((concentration / x) ** 0.5 * (x / loc - 1.))) +
         tf.exp(2. * concentration / loc) *
         special_math.ndtr(
             -(concentration / x) ** 0.5 * (x / loc + 1)))
Exemple #9
0
 def _log_cdf(self, x):
     scale = tf.convert_to_tensor(self.scale)
     concentration = tf.convert_to_tensor(self.concentration)
     z = self._z(x, scale, concentration)
     eq_zero = tf.equal(concentration,
                        0)  # Concentration = 0 ==> Exponential.
     nonzero_conc = tf.where(eq_zero, tf.constant(1, self.dtype),
                             concentration)
     where_nonzero = tf.math.log1p(-(1 + nonzero_conc * z)**(-1 /
                                                             nonzero_conc))
     where_zero = tf.math.log1p(-tf.exp(-z))
     return tf.where(eq_zero, where_zero, where_nonzero)
def _bessel_ive(v, z, cache=None):
  """Computes I_v(z)*exp(-abs(z)) using a recurrence relation, where z > 0."""
  # TODO(b/67497980): Switch to a more numerically faithful implementation.
  z = tf.convert_to_tensor(z)

  wrap = lambda result: tf.debugging.check_numerics(result, 'besseli{}'.format(v
                                                                              ))

  if float(v) >= 2:
    raise ValueError(
        'Evaluating bessel_i by recurrence becomes imprecise for large v')

  cache = cache or {}
  safe_z = tf.where(z > 0, z, tf.ones_like(z))
  if v in cache:
    return wrap(cache[v])
  if v == 0:
    cache[v] = tf.math.bessel_i0e(z)
  elif v == 1:
    cache[v] = tf.math.bessel_i1e(z)
  elif v == 0.5:
    # sinh(x)*exp(-abs(x)), sinh(x) = (e^x - e^{-x}) / 2
    sinhe = lambda x: (tf.exp(x - tf.abs(x)) - tf.exp(-x - tf.abs(x))) / 2
    cache[v] = (
        np.sqrt(2 / np.pi) * sinhe(z) *
        tf.where(z > 0, tf.math.rsqrt(safe_z), tf.ones_like(safe_z)))
  elif v == -0.5:
    # cosh(x)*exp(-abs(x)), cosh(x) = (e^x + e^{-x}) / 2
    coshe = lambda x: (tf.exp(x - tf.abs(x)) + tf.exp(-x - tf.abs(x))) / 2
    cache[v] = (
        np.sqrt(2 / np.pi) * coshe(z) *
        tf.where(z > 0, tf.math.rsqrt(safe_z), tf.ones_like(safe_z)))
  if v <= 1:
    return wrap(cache[v])
  # Recurrence relation:
  cache[v] = (_bessel_ive(v - 2, z, cache) -
              (2 * (v - 1)) * _bessel_ive(v - 1, z, cache) / z)
  return wrap(cache[v])
Exemple #11
0
 def _log_prob(self, x):
     scale = tf.convert_to_tensor(self.scale)
     # The exact HalfCauchy-Normal marginal log-density is analytically
     # intractable; we compute a (relatively accurate) numerical
     # approximation. This is a log space version of ref[2] from class docstring.
     xx = (x / scale)**2 / 2
     g = 0.5614594835668851  # tf.exp(-0.5772156649015328606)
     b = 1.0420764938351215  # tf.sqrt(2 * (1-g) / (g * (2-g)))
     h_inf = 1.0801359952503342  #  (1-g)*(g*g-6*g+12) / (3*g * (2-g)**2 * b)
     q = 20. / 47. * xx**1.0919284281983377
     h = 1. / (1 + xx**(1.5)) + h_inf * q / (1 + q)
     c = -.5 * np.log(2 * np.pi**3) - tf.math.log(g * scale)
     return -tf.math.log1p(
         (1 - g) / g * tf.exp(-xx / (1 - g))) + tf.math.log(
             tf.math.log1p(g / xx - (1 - g) / (h + b * xx)**2)) + c
def _logsum_expbig_minus_expsmall(big, small):
    """Stable evaluation of `Log[exp{big} - exp{small}]`.

  To work correctly, we should have the pointwise relation:  `small <= big`.

  Args:
    big: Floating-point `Tensor`
    small: Floating-point `Tensor` with same `dtype` as `big` and broadcastable
      shape.

  Returns:
    `Tensor` of same `dtype` of `big` and broadcast shape.
  """
    with tf.name_scope("logsum_expbig_minus_expsmall"):
        return tf.math.log1p(-tf.exp(small - big)) + big
Exemple #13
0
 def _sample_n(self, n, seed=None):
     # Here we use the fact that if:
     # lam ~ Gamma(concentration=total_count, rate=(1-probs)/probs)
     # then X ~ Poisson(lam) is Negative Binomially distributed.
     logits = self._logits_parameter_no_checks()
     stream = SeedStream(seed, salt='NegativeBinomial')
     rate = tf.random.gamma(shape=[n],
                            alpha=self.total_count,
                            beta=tf.exp(-logits),
                            dtype=self.dtype,
                            seed=stream())
     return tf.random.poisson(lam=rate,
                              shape=[],
                              dtype=self.dtype,
                              seed=stream())
 def _finish_prob_for_one_fiber(self, y, x, ildj, event_ndims,
                                **distribution_kwargs):
     """Finish computation of prob on one element of the inverse image."""
     x = self._maybe_rotate_dims(x, rotate_right=True)
     prob = self.distribution.prob(x, **distribution_kwargs)
     if self._is_maybe_event_override:
         prob = tf.reduce_prod(prob, axis=self._reduce_event_indices)
     prob = prob * tf.exp(tf.cast(ildj, prob.dtype))
     if self._is_maybe_event_override and isinstance(event_ndims, int):
         tensorshape_util.set_shape(
             prob,
             tf.broadcast_static_shape(
                 tensorshape_util.with_rank_at_least(y.shape,
                                                     1)[:-event_ndims],
                 self.batch_shape))
     return prob
Exemple #15
0
def log_cdf_laplace(x, name="log_cdf_laplace"):
    """Log Laplace distribution function.

  This function calculates `Log[L(x)]`, where `L(x)` is the cumulative
  distribution function of the Laplace distribution, i.e.

  ```L(x) := 0.5 * int_{-infty}^x e^{-|t|} dt```

  For numerical accuracy, `L(x)` is computed in different ways depending on `x`,

  ```
  x <= 0:
    Log[L(x)] = Log[0.5] + x, which is exact

  0 < x:
    Log[L(x)] = Log[1 - 0.5 * e^{-x}], which is exact
  ```

  Args:
    x: `Tensor` of type `float32`, `float64`.
    name: Python string. A name for the operation (default="log_ndtr").

  Returns:
    `Tensor` with `dtype=x.dtype`.

  Raises:
    TypeError: if `x.dtype` is not handled.
  """

    with tf.name_scope(name):
        x = tf.convert_to_tensor(x, name="x")

        # For x < 0, L(x) = 0.5 * exp{x} exactly, so Log[L(x)] = log(0.5) + x.
        lower_solution = -np.log(2.) + x

        # safe_exp_neg_x = exp{-x} for x > 0, but is
        # bounded above by 1, which avoids
        #   log[1 - 1] = -inf for x = log(1/2), AND
        #   exp{-x} --> inf, for x << -1
        safe_exp_neg_x = tf.exp(-tf.abs(x))

        # log1p(z) = log(1 + z) approx z for |z| << 1. This approxmation is used
        # internally by log1p, rather than being done explicitly here.
        upper_solution = tf.math.log1p(-0.5 * safe_exp_neg_x)

        return tf.where(x < 0., lower_solution, upper_solution)
Exemple #16
0
  def _hat_integral(self, x, power):
    """Integral of the `hat` function, used for sampling.

    We choose a `hat` function, h(x) = x^(-power), which is a continuous
    (unnormalized) density touching each positive integer at the (unnormalized)
    pmf. This function implements `hat` integral: H(x) = int_x^inf h(t) dt;
    which is needed for sampling purposes.

    Arguments:
      x: A Tensor of points x at which to evaluate H(x).
      power: Power that parameterized hat function.

    Returns:
      A Tensor containing evaluation H(x) at x.
    """
    x = tf.cast(x, power.dtype)
    t = power - 1.
    return tf.exp((-t) * tf.math.log1p(x) - tf.math.log(t))
def _kl_bernoulli_bernoulli(a, b, name=None):
    """Calculate the batched KL divergence KL(a || b) with a and b ProbitBernoulli.

  Args:
    a: instance of a ProbitBernoulli distribution object.
    b: instance of a ProbitBernoulli distribution object.
    name: Python `str` name to use for created operations.
      Default value: `None` (i.e., `'kl_bernoulli_bernoulli'`).

  Returns:
    Batchwise KL(a || b)
  """
    with tf.name_scope(name or 'kl_probit_bernoulli_probit_bernoulli'):
        a_log_probs0, a_log_probs1 = a._outcome_log_probs()  # pylint: disable=protected-access
        b_log_probs0, b_log_probs1 = b._outcome_log_probs()  # pylint: disable=protected-access
        a_prob1 = tf.exp(a_log_probs1)

        return (1. - a_prob1) * (a_log_probs0 - b_log_probs0) + a_prob1 * (
            a_log_probs1 - b_log_probs1)
    def _prob(self, y, **kwargs):
        if not hasattr(self.distribution, "_prob"):
            return tf.exp(self.log_prob(y, **kwargs))
        distribution_kwargs, bijector_kwargs = self._kwargs_split_fn(kwargs)

        x = self.bijector.inverse(y, **bijector_kwargs)
        event_ndims = self._maybe_get_static_event_ndims()
        ildj = self.bijector.inverse_log_det_jacobian(y,
                                                      event_ndims=event_ndims,
                                                      **bijector_kwargs)
        if self.bijector._is_injective:  # pylint: disable=protected-access
            return self._finish_prob_for_one_fiber(y, x, ildj, event_ndims,
                                                   **distribution_kwargs)

        prob_on_fibers = [
            self._finish_prob_for_one_fiber(y, x_i, ildj_i, event_ndims,
                                            **distribution_kwargs)
            for x_i, ildj_i in zip(x, ildj)
        ]
        return sum(prob_on_fibers)
Exemple #19
0
def _kl_laplace_laplace(a, b, name=None):
  """Calculate the batched KL divergence KL(a || b) with a and b Laplace.

  Args:
    a: instance of a Laplace distribution object.
    b: instance of a Laplace distribution object.
    name: Python `str` name to use for created operations.
      Default value: `None` (i.e., `'kl_laplace_laplace'`).

  Returns:
    kl_div: Batchwise KL(a || b)
  """
  with tf.name_scope(name or 'kl_laplace_laplace'):
    # Consistent with
    # http://www.mast.queensu.ca/~communications/Papers/gil-msc11.pdf, page 38
    distance = tf.abs(a.loc - b.loc)
    a_scale = tf.convert_to_tensor(a.scale)
    b_scale = tf.convert_to_tensor(b.scale)
    delta_log_scale = tf.math.log(a_scale) - tf.math.log(b_scale)
    return (-delta_log_scale +
            distance / b_scale - 1. +
            tf.exp(-distance / a_scale + delta_log_scale))
 def _inverse(self, y):
     y = self._maybe_assert_valid(y)
     return tf.exp(
         tf.math.log1p(-(1 - y**self.concentration1)**self.concentration0))
 def _forward(self, x):
     x = self._maybe_assert_valid(x)
     return tf.exp(
         tf.math.log1p(-tf.exp(tf.math.log1p(-x) / self.concentration0)) /
         self.concentration1)
 def _stddev(self):
     return tf.exp(0.5 * self._log_variance())
 def _variance(self):
     return tf.exp(self._log_variance())
 def _mean(self, distributions=None):
     if distributions is None:
         distributions = self.poisson_and_mixture_distributions()
     dist, mixture_dist = distributions
     return tf.exp(
         tf.reduce_logsumexp(mixture_dist.logits + dist.log_rate, axis=-1))
 def _variance(self):
     logits, probs = self._logits_and_probs_no_checks()
     return tf.exp(-logits) / probs
 def _mean(self):
     return tf.exp(-self._logits_parameter_no_checks())
Exemple #27
0
 def _mean(self, df=None):
     df = tf.convert_to_tensor(self.df if df is None else df)
     return np.sqrt(2.) * tf.exp(
         tf.math.lgamma(0.5 * (df + 1.)) - tf.math.lgamma(0.5 * df))
Exemple #28
0
 def _rate_parameter_no_checks(self):
     if self._rate is None:
         return tf.exp(self._log_rate)
     return tf.identity(self._rate)
Exemple #29
0
 def _log_normalization(self, log_rate):
     return tf.exp(log_rate)
 def _entropy(self):
     log_probs0, log_probs1 = self._outcome_log_probs()
     probs1 = tf.exp(log_probs1)
     return -(1. - probs1) * log_probs0 - probs1 * log_probs1